[clang] [llvm] [mlir] Added Intrinsics for smed, umed, to support ISA instructions from ROCDL (PR #157748)
Keshav Vinayak Jha via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 9 13:59:06 PDT 2025
https://github.com/keshavvinayak01 created https://github.com/llvm/llvm-project/pull/157748
None
>From 49f6936ece68e845d956efe3e04855e49955bb5b Mon Sep 17 00:00:00 2001
From: keshavvinayak01 <keshavvinayakjha at gmail.com>
Date: Tue, 9 Sep 2025 20:50:03 +0000
Subject: [PATCH] Added Intrinsics for smed, umed, to support ISA instructions
from ROCDL
Signed-off-by: keshavvinayak01 <keshavvinayakjha at gmail.com>
---
clang/include/clang/Basic/BuiltinsAMDGPU.def | 4 +
clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp | 8 +
llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 12 ++
llvm/lib/Target/AMDGPU/AMDGPUGISel.td | 4 +-
.../AMDGPU/AMDGPUInstCombineIntrinsic.cpp | 172 ++++++++++++++++++
llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td | 19 +-
.../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 22 +++
.../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 3 +
mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td | 88 +++++++++
9 files changed, 323 insertions(+), 9 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index e5a1422fe8778..b923c519c56cd 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -125,6 +125,8 @@ BUILTIN(__builtin_amdgcn_ds_bpermute, "iii", "nc")
BUILTIN(__builtin_amdgcn_readfirstlane, "ii", "nc")
BUILTIN(__builtin_amdgcn_readlane, "iii", "nc")
BUILTIN(__builtin_amdgcn_fmed3f, "ffff", "nc")
+BUILTIN(__builtin_amdgcn_smed3, "iiii", "nc")
+BUILTIN(__builtin_amdgcn_umed3, "UiUiUiUi", "nc")
BUILTIN(__builtin_amdgcn_ds_faddf, "ff*3fIiIiIb", "n")
BUILTIN(__builtin_amdgcn_ds_fminf, "ff*3fIiIiIb", "n")
BUILTIN(__builtin_amdgcn_ds_fmaxf, "ff*3fIiIiIb", "n")
@@ -251,6 +253,8 @@ TARGET_BUILTIN(__builtin_amdgcn_perm, "UiUiUiUi", "nc", "gfx8-insts")
//===----------------------------------------------------------------------===//
TARGET_BUILTIN(__builtin_amdgcn_fmed3h, "hhhh", "nc", "gfx9-insts")
+TARGET_BUILTIN(__builtin_amdgcn_smed3h, "ssss", "nc", "gfx9-insts")
+TARGET_BUILTIN(__builtin_amdgcn_umed3h, "UsUsUsUs", "nc", "gfx9-insts")
TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_f64, "dd*1d", "t", "gfx90a-insts")
TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_f32, "ff*1f", "t", "atomic-fadd-rtn-insts")
diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
index 87a46287c4022..b189fd745aa2d 100644
--- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
@@ -548,6 +548,14 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
case AMDGPU::BI__builtin_amdgcn_fmed3h:
return emitBuiltinWithOneOverloadedType<3>(*this, E,
Intrinsic::amdgcn_fmed3);
+ case AMDGPU::BI__builtin_amdgcn_smed3:
+ case AMDGPU::BI__builtin_amdgcn_smed3h:
+ return emitBuiltinWithOneOverloadedType<3>(*this, E,
+ Intrinsic::amdgcn_smed3);
+ case AMDGPU::BI__builtin_amdgcn_umed3:
+ case AMDGPU::BI__builtin_amdgcn_umed3h:
+ return emitBuiltinWithOneOverloadedType<3>(*this, E,
+ Intrinsic::amdgcn_umed3);
case AMDGPU::BI__builtin_amdgcn_ds_append:
case AMDGPU::BI__builtin_amdgcn_ds_consume: {
Intrinsic::ID Intrin = BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_append ?
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 5bbc16f2dc743..00e017040ec1e 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -531,6 +531,18 @@ def int_amdgcn_fmed3 :
[IntrNoMem, IntrSpeculatable]
>;
+def int_amdgcn_smed3 :
+ DefaultAttrsIntrinsic<[llvm_anyint_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
+ [IntrNoMem, IntrSpeculatable]
+>;
+
+def int_amdgcn_umed3 :
+ DefaultAttrsIntrinsic<[llvm_anyint_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
+ [IntrNoMem, IntrSpeculatable]
+>;
+
def int_amdgcn_cubeid : ClangBuiltin<"__builtin_amdgcn_cubeid">,
DefaultAttrsIntrinsic<[llvm_float_ty],
[llvm_float_ty, llvm_float_ty, llvm_float_ty],
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index 0c112d1787c1a..b76956b6b4d24 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -256,8 +256,8 @@ def : GINodeEquiv<G_AMDGPU_CVT_F32_UBYTE2, AMDGPUcvt_f32_ubyte2>;
def : GINodeEquiv<G_AMDGPU_CVT_F32_UBYTE3, AMDGPUcvt_f32_ubyte3>;
def : GINodeEquiv<G_AMDGPU_CVT_PK_I16_I32, AMDGPUpk_i16_i32_impl>;
-def : GINodeEquiv<G_AMDGPU_SMED3, AMDGPUsmed3>;
-def : GINodeEquiv<G_AMDGPU_UMED3, AMDGPUumed3>;
+def : GINodeEquiv<G_AMDGPU_SMED3, AMDGPUsmed3_impl>;
+def : GINodeEquiv<G_AMDGPU_UMED3, AMDGPUumed3_impl>;
def : GINodeEquiv<G_AMDGPU_FMED3, AMDGPUfmed3_impl>;
def : GINodeEquiv<G_AMDGPU_CLAMP, AMDGPUclamp>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index 4fe5d00679436..1efa5b9861188 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -60,6 +60,26 @@ static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1,
return maxnum(Src0, Src1);
}
+// Constant fold llvm.amdgcn.smed3 intrinsics for standard inputs.
+static APInt smed3AMDGCN(const APInt &Src0, const APInt &Src1, const APInt &Src2) {
+ APInt Max3 = Src0.sgt(Src1) ? (Src0.sgt(Src2) ? Src0 : Src2)
+ : (Src1.sgt(Src2) ? Src1 : Src2);
+
+ if (Max3 == Src0) return Src1.sgt(Src2) ? Src1 : Src2;
+ if (Max3 == Src1) return Src0.sgt(Src2) ? Src0 : Src2;
+ return Src0.sgt(Src1) ? Src0 : Src1;
+}
+
+// Constant fold llvm.amdgcn.umed3 intrinsics for standard inputs.
+static APInt umed3AMDGCN(const APInt &Src0, const APInt &Src1, const APInt &Src2) {
+ APInt Max3 = Src0.ugt(Src1) ? (Src0.ugt(Src2) ? Src0 : Src2)
+ : (Src1.ugt(Src2) ? Src1 : Src2);
+
+ if (Max3 == Src0) return Src1.ugt(Src2) ? Src1 : Src2;
+ if (Max3 == Src1) return Src0.ugt(Src2) ? Src0 : Src2;
+ return Src0.ugt(Src1) ? Src0 : Src1;
+}
+
// Check if a value can be converted to a 16-bit value without losing
// precision.
// The value is expected to be either a float (IsFloat = true) or an unsigned
@@ -427,6 +447,36 @@ static Value *matchFPExtFromF16(Value *Arg) {
return nullptr;
}
+/// Match an sext from i16 to i32, or a constant we can convert.
+static Value *matchSExtFromI16(Value *Arg) {
+ Value *Src = nullptr;
+ ConstantInt *CInt = nullptr;
+ if (match(Arg, m_OneUse(m_SExt(m_Value(Src))))) {
+ if (Src->getType()->isIntegerTy(16))
+ return Src;
+ } else if (match(Arg, m_ConstantInt(CInt))) {
+ // Check if the constant fits in i16
+ if (CInt->getValue().getMinSignedBits() <= 16)
+ return ConstantInt::get(Type::getInt16Ty(Arg->getContext()), CInt->getValue().trunc(16));
+ }
+ return nullptr;
+}
+
+/// Match a zext from i16 to i32, or a constant we can convert.
+static Value *matchZExtFromI16(Value *Arg) {
+ Value *Src = nullptr;
+ ConstantInt *CInt = nullptr;
+ if (match(Arg, m_OneUse(m_ZExt(m_Value(Src))))) {
+ if (Src->getType()->isIntegerTy(16))
+ return Src;
+ } else if (match(Arg, m_ConstantInt(CInt))) {
+ // Check if the constant fits in i16
+ if (CInt->getValue().getActiveBits() <= 16)
+ return ConstantInt::get(Type::getInt16Ty(Arg->getContext()), CInt->getValue().trunc(16));
+ }
+ return nullptr;
+}
+
// Trim all zero components from the end of the vector \p UseV and return
// an appropriate bitset with known elements.
static APInt trimTrailingZerosInVector(InstCombiner &IC, Value *UseV,
@@ -1174,6 +1224,128 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
break;
}
+ case Intrinsic::amdgcn_smed3: {
+ Value *Src0 = II.getArgOperand(0);
+ Value *Src1 = II.getArgOperand(1);
+ Value *Src2 = II.getArgOperand(2);
+
+ // Propagate poison values.
+ for (Value *Src : {Src0, Src1, Src2}) {
+ if (isa<PoisonValue>(Src))
+ return IC.replaceInstUsesWith(II, Src);
+ }
+
+ bool Swap = false;
+ // Canonicalize constants to RHS operands.
+ //
+ // smed3(c0, x, c1) -> smed3(x, c0, c1)
+ if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
+ std::swap(Src0, Src1);
+ Swap = true;
+ }
+
+ if (isa<Constant>(Src1) && !isa<Constant>(Src2)) {
+ std::swap(Src1, Src2);
+ Swap = true;
+ }
+
+ if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
+ std::swap(Src0, Src1);
+ Swap = true;
+ }
+
+ if (Swap) {
+ II.setArgOperand(0, Src0);
+ II.setArgOperand(1, Src1);
+ II.setArgOperand(2, Src2);
+ return &II;
+ }
+
+ // Constant fold smed3 with constant operands.
+ if (const ConstantInt *C0 = dyn_cast<ConstantInt>(Src0)) {
+ if (const ConstantInt *C1 = dyn_cast<ConstantInt>(Src1)) {
+ if (const ConstantInt *C2 = dyn_cast<ConstantInt>(Src2)) {
+ APInt Result = smed3AMDGCN(C0->getValue(), C1->getValue(), C2->getValue());
+ return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), Result));
+ }
+ }
+ }
+
+ // Width reduction for integer extensions.
+ // smed3((sext X), (sext Y), (sext Z)) -> sext (smed3(X, Y, Z))
+ if (Value *X = matchSExtFromI16(Src0)) {
+ if (Value *Y = matchSExtFromI16(Src1)) {
+ if (Value *Z = matchSExtFromI16(Src2)) {
+ Value *NewCall = IC.Builder.CreateIntrinsic(
+ IID, {X->getType()}, {X, Y, Z}, &II, II.getName());
+ return new SExtInst(NewCall, II.getType());
+ }
+ }
+ }
+
+ break;
+ }
+ case Intrinsic::amdgcn_umed3: {
+ Value *Src0 = II.getArgOperand(0);
+ Value *Src1 = II.getArgOperand(1);
+ Value *Src2 = II.getArgOperand(2);
+
+ // Propagate poison values.
+ for (Value *Src : {Src0, Src1, Src2}) {
+ if (isa<PoisonValue>(Src))
+ return IC.replaceInstUsesWith(II, Src);
+ }
+
+ bool Swap = false;
+ // Canonicalize constants to RHS operands.
+ //
+ // umed3(c0, x, c1) -> umed3(x, c0, c1)
+ if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
+ std::swap(Src0, Src1);
+ Swap = true;
+ }
+
+ if (isa<Constant>(Src1) && !isa<Constant>(Src2)) {
+ std::swap(Src1, Src2);
+ Swap = true;
+ }
+
+ if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
+ std::swap(Src0, Src1);
+ Swap = true;
+ }
+
+ if (Swap) {
+ II.setArgOperand(0, Src0);
+ II.setArgOperand(1, Src1);
+ II.setArgOperand(2, Src2);
+ return &II;
+ }
+
+ // Constant fold umed3 with constant operands.
+ if (const ConstantInt *C0 = dyn_cast<ConstantInt>(Src0)) {
+ if (const ConstantInt *C1 = dyn_cast<ConstantInt>(Src1)) {
+ if (const ConstantInt *C2 = dyn_cast<ConstantInt>(Src2)) {
+ APInt Result = umed3AMDGCN(C0->getValue(), C1->getValue(), C2->getValue());
+ return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), Result));
+ }
+ }
+ }
+
+ // Width reduction for integer extensions.
+ // umed3((zext X), (zext Y), (zext Z)) -> zext (umed3(X, Y, Z))
+ if (Value *X = matchZExtFromI16(Src0)) {
+ if (Value *Y = matchZExtFromI16(Src1)) {
+ if (Value *Z = matchZExtFromI16(Src2)) {
+ Value *NewCall = IC.Builder.CreateIntrinsic(
+ IID, {X->getType()}, {X, Y, Z}, &II, II.getName());
+ return new ZExtInst(NewCall, II.getType());
+ }
+ }
+ }
+
+ break;
+ }
case Intrinsic::amdgcn_icmp:
case Intrinsic::amdgcn_fcmp: {
const ConstantInt *CC = cast<ConstantInt>(II.getArgOperand(2));
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
index b8fa6f3fc6867..e9680e062cffa 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -334,16 +334,13 @@ def AMDGPUmad_i24 : SDNode<"AMDGPUISD::MAD_I24", AMDGPUDTIntTernaryOp,
[]
>;
-def AMDGPUsmed3 : SDNode<"AMDGPUISD::SMED3", AMDGPUDTIntTernaryOp,
- []
->;
-
-def AMDGPUumed3 : SDNode<"AMDGPUISD::UMED3", AMDGPUDTIntTernaryOp,
- []
->;
def AMDGPUfmed3_impl : SDNode<"AMDGPUISD::FMED3", SDTFPTernaryOp, []>;
+def AMDGPUsmed3_impl : SDNode<"AMDGPUISD::SMED3", AMDGPUDTIntTernaryOp, []>;
+
+def AMDGPUumed3_impl : SDNode<"AMDGPUISD::UMED3", AMDGPUDTIntTernaryOp, []>;
+
def AMDGPUfdot2_impl : SDNode<"AMDGPUISD::FDOT2",
SDTypeProfile<1, 4, [SDTCisSameAs<0, 3>, SDTCisSameAs<1, 2>,
SDTCisFP<0>, SDTCisVec<1>,
@@ -448,6 +445,14 @@ def AMDGPUfmed3 : PatFrags<(ops node:$src0, node:$src1, node:$src2),
[(int_amdgcn_fmed3 node:$src0, node:$src1, node:$src2),
(AMDGPUfmed3_impl node:$src0, node:$src1, node:$src2)]>;
+def AMDGPUsmed3 : PatFrags<(ops node:$src0, node:$src1, node:$src2),
+ [(int_amdgcn_smed3 node:$src0, node:$src1, node:$src2),
+ (AMDGPUsmed3_impl node:$src0, node:$src1, node:$src2)]>;
+
+def AMDGPUumed3 : PatFrags<(ops node:$src0, node:$src1, node:$src2),
+ [(int_amdgcn_umed3 node:$src0, node:$src1, node:$src2),
+ (AMDGPUumed3_impl node:$src0, node:$src1, node:$src2)]>;
+
def AMDGPUdiv_fixup : PatFrags<(ops node:$src0, node:$src1, node:$src2),
[(int_amdgcn_div_fixup node:$src0, node:$src1, node:$src2),
(AMDGPUdiv_fixup_impl node:$src0, node:$src1, node:$src2)]>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index f18536cd4ab93..5b91da5ef81fb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -7797,6 +7797,28 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
MI.removeOperand(1);
Observer.changedInstr(MI);
return true;
+ }`
+ case Intrinsic::amdgcn_smed3: {
+ GISelChangeObserver &Observer = Helper.Observer;
+
+ // FIXME: This is to workaround the inability of tablegen match combiners to
+ // match intrinsics in patterns.
+ Observer.changingInstr(MI);
+ MI.setDesc(B.getTII().get(AMDGPU::G_AMDGPU_SMED3));
+ MI.removeOperand(1);
+ Observer.changedInstr(MI);
+ return true;
+ }
+ case Intrinsic::amdgcn_umed3: {
+ GISelChangeObserver &Observer = Helper.Observer;
+
+ // FIXME: This is to workaround the inability of tablegen match combiners to
+ // match intrinsics in patterns.
+ Observer.changingInstr(MI);
+ MI.setDesc(B.getTII().get(AMDGPU::G_AMDGPU_UMED3));
+ MI.removeOperand(1);
+ Observer.changedInstr(MI);
+ return true;
}
case Intrinsic::amdgcn_readlane:
case Intrinsic::amdgcn_writelane:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 36b27bef350ed..63141d065bf65 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -4136,6 +4136,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_AMDGPU_CVT_F32_UBYTE3:
case AMDGPU::G_AMDGPU_CVT_PK_I16_I32:
case AMDGPU::G_AMDGPU_SMED3:
+ case AMDGPU::G_AMDGPU_UMED3:
case AMDGPU::G_AMDGPU_FMED3:
return getDefaultMappingVOP(MI);
case AMDGPU::G_UMULH:
@@ -4660,6 +4661,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_cvt_scalef32_sr_pk16_bf6_bf16:
case Intrinsic::amdgcn_sat_pk4_i4_i8:
case Intrinsic::amdgcn_sat_pk4_u4_u8:
+ case Intrinsic::amdgcn_smed3:
+ case Intrinsic::amdgcn_umed3:
case Intrinsic::amdgcn_fmed3:
case Intrinsic::amdgcn_cubeid:
case Intrinsic::amdgcn_cubema:
diff --git a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
index 9fa3ec1fc4b21..5e757ae337879 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
@@ -1291,6 +1291,94 @@ def ROCDL_CvtScaleF32PkFp4F32Op :
}];
}
+//===----------------------------------------------------------------------===//
+// MED3 operations
+//===----------------------------------------------------------------------===//
+
+def ROCDL_Med3F16Op : ROCDL_ConcreteNonMemIntrOp<"med3.f16", [Pure], 1>,
+ Arguments<(ins F16:$src0,
+ F16:$src1,
+ F16:$src2)> {
+ let results = (outs F16:$res);
+ let summary = "Median of three half-precision float values";
+ let assemblyFormat = [{
+ $src0 `,` $src1 `,` $src2 attr-dict `:` `(` type($src0) `,` type($src1) `,` type($src2) `)` `->` type($res)
+ }];
+ string llvmBuilder = [{
+ $res = createIntrinsicCall(builder, llvm::Intrinsic::amdgcn_med3_f16, {$src0, $src1, $src2});
+ }];
+}
+
+def ROCDL_Med3F32Op : ROCDL_ConcreteNonMemIntrOp<"med3.f32", [Pure], 1>,
+ Arguments<(ins F32:$src0,
+ F32:$src1,
+ F32:$src2)> {
+ let results = (outs F32:$res);
+ let summary = "Median of three single-precision float values";
+ let assemblyFormat = [{
+ $src0 `,` $src1 `,` $src2 attr-dict `:` `(` type($src0) `,` type($src1) `,` type($src2) `)` `->` type($res)
+ }];
+ string llvmBuilder = [{
+ $res = createIntrinsicCall(builder, llvm::Intrinsic::amdgcn_med3_f32, {$src0, $src1, $src2});
+ }];
+}
+
+def ROCDL_Med3I16Op : ROCDL_ConcreteNonMemIntrOp<"med3.i16", [Pure], 1>,
+ Arguments<(ins I16:$src0,
+ I16:$src1,
+ I16:$src2)> {
+ let results = (outs I16:$res);
+ let summary = "Median of three signed 16-bit integer values";
+ let assemblyFormat = [{
+ $src0 `,` $src1 `,` $src2 attr-dict `:` `(` type($src0) `,` type($src1) `,` type($src2) `)` `->` type($res)
+ }];
+ string llvmBuilder = [{
+ $res = createIntrinsicCall(builder, llvm::Intrinsic::amdgcn_smed3, {$src0, $src1, $src2});
+ }];
+}
+
+def ROCDL_Med3I32Op : ROCDL_ConcreteNonMemIntrOp<"med3.i32", [Pure], 1>,
+ Arguments<(ins I32:$src0,
+ I32:$src1,
+ I32:$src2)> {
+ let results = (outs I32:$res);
+ let summary = "Median of three signed 32-bit integer values";
+ let assemblyFormat = [{
+ $src0 `,` $src1 `,` $src2 attr-dict `:` `(` type($src0) `,` type($src1) `,` type($src2) `)` `->` type($res)
+ }];
+ string llvmBuilder = [{
+ $res = createIntrinsicCall(builder, llvm::Intrinsic::amdgcn_smed3, {$src0, $src1, $src2});
+ }];
+}
+
+def ROCDL_Med3U16Op : ROCDL_ConcreteNonMemIntrOp<"med3.u16", [Pure], 1>,
+ Arguments<(ins I16:$src0,
+ I16:$src1,
+ I16:$src2)> {
+ let results = (outs I16:$res);
+ let summary = "Median of three unsigned 16-bit integer values";
+ let assemblyFormat = [{
+ $src0 `,` $src1 `,` $src2 attr-dict `:` `(` type($src0) `,` type($src1) `,` type($src2) `)` `->` type($res)
+ }];
+ string llvmBuilder = [{
+ $res = createIntrinsicCall(builder, llvm::Intrinsic::amdgcn_umed3, {$src0, $src1, $src2});
+ }];
+}
+
+def ROCDL_Med3U32Op : ROCDL_ConcreteNonMemIntrOp<"med3.u32", [Pure], 1>,
+ Arguments<(ins I32:$src0,
+ I32:$src1,
+ I32:$src2)> {
+ let results = (outs I32:$res);
+ let summary = "Median of three unsigned 32-bit integer values";
+ let assemblyFormat = [{
+ $src0 `,` $src1 `,` $src2 attr-dict `:` `(` type($src0) `,` type($src1) `,` type($src2) `)` `->` type($res)
+ }];
+ string llvmBuilder = [{
+ $res = createIntrinsicCall(builder, llvm::Intrinsic::amdgcn_umed3, {$src0, $src1, $src2});
+ }];
+}
+
//===----------------------------------------------------------------------===//
// ROCDL target attribute.
//===----------------------------------------------------------------------===//
More information about the llvm-commits
mailing list