[llvm] 2e2bbca - AMDGPU/GlobalISel: Start legalizing minimumnum and maximumnum (#140900)
via llvm-commits
llvm-commits at lists.llvm.org
Wed May 21 08:00:50 PDT 2025
Author: Matt Arsenault
Date: 2025-05-21T17:00:45+02:00
New Revision: 2e2bbcacf813de52f6e7f48dea67e26de1f1f99e
URL: https://github.com/llvm/llvm-project/commit/2e2bbcacf813de52f6e7f48dea67e26de1f1f99e
DIFF: https://github.com/llvm/llvm-project/commit/2e2bbcacf813de52f6e7f48dea67e26de1f1f99e.diff
LOG: AMDGPU/GlobalISel: Start legalizing minimumnum and maximumnum (#140900)
This is the bare minimum to get the intrinsic to compile for AMDGPU,
and it's not optimal. We need to follow along closer with the existing
G_FMINNUM/G_FMAXNUM with custom lowering to handle the IEEE=0 case
better.
Just re-use the existing lowering for the old semantics for
G_FMINNUM/G_FMAXNUM. This does not change G_FMINNUM/G_FMAXNUM's
treatment,
nor try to handle the general expansion without an underlying min/max
variant (or with G_FMINIMUM/G_FMAXIMUM).
Added:
Modified:
llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
llvm/test/CodeGen/AMDGPU/maximumnum.ll
llvm/test/CodeGen/AMDGPU/minimumnum.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 72f2ba75c927e..7b18a98d7f3ca 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -3221,6 +3221,8 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
case TargetOpcode::G_FMAXNUM_IEEE:
case TargetOpcode::G_FMINIMUM:
case TargetOpcode::G_FMAXIMUM:
+ case TargetOpcode::G_FMINIMUMNUM:
+ case TargetOpcode::G_FMAXIMUMNUM:
case TargetOpcode::G_FDIV:
case TargetOpcode::G_FREM:
case TargetOpcode::G_FCEIL:
@@ -4591,6 +4593,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
return lowerFCopySign(MI);
case G_FMINNUM:
case G_FMAXNUM:
+ case G_FMINIMUMNUM:
+ case G_FMAXIMUMNUM:
return lowerFMinNumMaxNum(MI);
case G_MERGE_VALUES:
return lowerMergeValues(MI);
@@ -5379,6 +5383,8 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_FMAXNUM_IEEE:
case G_FMINIMUM:
case G_FMAXIMUM:
+ case G_FMINIMUMNUM:
+ case G_FMAXIMUMNUM:
case G_FSHL:
case G_FSHR:
case G_ROTL:
@@ -6090,6 +6096,8 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
case TargetOpcode::G_FMAXNUM_IEEE:
case TargetOpcode::G_FMINIMUM:
case TargetOpcode::G_FMAXIMUM:
+ case TargetOpcode::G_FMINIMUMNUM:
+ case TargetOpcode::G_FMAXIMUMNUM:
case TargetOpcode::G_STRICT_FADD:
case TargetOpcode::G_STRICT_FSUB:
case TargetOpcode::G_STRICT_FMUL:
@@ -8139,8 +8147,27 @@ LegalizerHelper::lowerFCopySign(MachineInstr &MI) {
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) {
- unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ?
- TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE;
+ // FIXME: fminnum/fmaxnum and fminimumnum/fmaximumnum should not have
+ // identical handling. fminimumnum/fmaximumnum also need a path that do not
+ // depend on fminnum/fmaxnum.
+
+ unsigned NewOp;
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_FMINNUM:
+ NewOp = TargetOpcode::G_FMINNUM_IEEE;
+ break;
+ case TargetOpcode::G_FMINIMUMNUM:
+ NewOp = TargetOpcode::G_FMINNUM;
+ break;
+ case TargetOpcode::G_FMAXNUM:
+ NewOp = TargetOpcode::G_FMAXNUM_IEEE;
+ break;
+ case TargetOpcode::G_FMAXIMUMNUM:
+ NewOp = TargetOpcode::G_FMAXNUM;
+ break;
+ default:
+ llvm_unreachable("unexpected min/max opcode");
+ }
auto [Dst, Src0, Src1] = MI.getFirst3Regs();
LLT Ty = MRI.getType(Dst);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 7bb461e0a239f..667c466a998e0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -960,6 +960,10 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
auto &MinNumMaxNum = getActionDefinitionsBuilder({
G_FMINNUM, G_FMAXNUM, G_FMINNUM_IEEE, G_FMAXNUM_IEEE});
+ // TODO: These should be custom lowered and are directly legal with IEEE=0
+ auto &MinimumNumMaximumNum =
+ getActionDefinitionsBuilder({G_FMINIMUMNUM, G_FMAXIMUMNUM});
+
if (ST.hasVOP3PInsts()) {
MinNumMaxNum.customFor(FPTypesPK16)
.moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
@@ -976,6 +980,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.scalarize(0);
}
+ MinimumNumMaximumNum.lower();
+
if (ST.hasVOP3PInsts())
FPOpActions.clampMaxNumElementsStrict(0, S16, 2);
diff --git a/llvm/test/CodeGen/AMDGPU/maximumnum.ll b/llvm/test/CodeGen/AMDGPU/maximumnum.ll
index e299f959edb08..c45d86ce306e7 100644
--- a/llvm/test/CodeGen/AMDGPU/maximumnum.ll
+++ b/llvm/test/CodeGen/AMDGPU/maximumnum.ll
@@ -1,106 +1,209 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck -check-prefix=GFX7 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefix=GFX8 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefix=GFX10 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-TRUE16 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck -check-prefixes=GFX7,GFX7-SDAG %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck -check-prefixes=GFX7,GFX7-GISEL %s
+
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8,GFX8-SDAG %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8,GFX8-GISEL %s
+
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900,GFX9-SDAG,GFX900-SDAG %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900,GFX9-GISEL,GFX900-GISEL %s
+
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950,GFX9-SDAG,GFX950-SDAG %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950,GFX9-GISEL,GFX950-GISEL %s
+
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10,GFX10-SDAG %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10,GFX10-GISEL %s
+
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16,GFX11-SDAG,GFX11-TRUE16-SDAG %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16,GFX11-GISEL,GFX11-TRUE16-GISEL %s
+
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16,GFX11-SDAG,GFX11-FAKE16-SDAG %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16,GFX11-GISEL,GFX11-FAKE16-GISEL %s
+
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-TRUE16,GFX12-SDAG,GFX12-TRUE16-SDAG %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-TRUE16,GFX12-GISEL,GFX12-TRUE16-GISEL %s
+
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16,GFX12-SDAG,GFX12-FAKE16-SDAG %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16,GFX12-GISEL,GFX12-FAKE16-GISEL %s
define half @v_maximumnum_f16(half %x, half %y) {
-; GFX7-LABEL: v_maximumnum_f16:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_maximumnum_f16:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_max_f16_e32 v1, v1, v1
-; GFX8-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX8-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: v_maximumnum_f16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f16_e32 v1, v1, v1
-; GFX9-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX9-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_maximumnum_f16:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_max_f16_e32 v1, v1, v1
-; GFX10-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX10-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-TRUE16-LABEL: v_maximumnum_f16:
-; GFX11-TRUE16: ; %bb.0:
-; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.h, v1.l, v1.l
-; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.h
-; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FAKE16-LABEL: v_maximumnum_f16:
-; GFX11-FAKE16: ; %bb.0:
-; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FAKE16-NEXT: v_max_f16_e32 v1, v1, v1
-; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-TRUE16-LABEL: v_maximumnum_f16:
-; GFX12-TRUE16: ; %bb.0:
-; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
-; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l
-; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
-; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
-; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-FAKE16-LABEL: v_maximumnum_f16:
-; GFX12-FAKE16: ; %bb.0:
-; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
-; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1
-; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0
-; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v1
-; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_maximumnum_f16:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_maximumnum_f16:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_maximumnum_f16:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_maximumnum_f16:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX8-GISEL-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: v_maximumnum_f16:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: v_maximumnum_f16:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX9-GISEL-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_maximumnum_f16:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX10-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX10-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_maximumnum_f16:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX10-GISEL-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX10-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-SDAG-LABEL: v_maximumnum_f16:
+; GFX11-TRUE16-SDAG: ; %bb.0:
+; GFX11-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e32 v0.h, v1.l, v1.l
+; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l
+; GFX11-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e32 v0.l, v0.l, v0.h
+; GFX11-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-GISEL-LABEL: v_maximumnum_f16:
+; GFX11-TRUE16-GISEL: ; %bb.0:
+; GFX11-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l
+; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e32 v0.h, v1.l, v1.l
+; GFX11-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e32 v0.l, v0.l, v0.h
+; GFX11-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-SDAG-LABEL: v_maximumnum_f16:
+; GFX11-FAKE16-SDAG: ; %bb.0:
+; GFX11-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX11-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX11-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-GISEL-LABEL: v_maximumnum_f16:
+; GFX11-FAKE16-GISEL: ; %bb.0:
+; GFX11-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX11-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-TRUE16-SDAG-LABEL: v_maximumnum_f16:
+; GFX12-TRUE16-SDAG: ; %bb.0:
+; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l
+; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GFX12-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
+; GFX12-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-TRUE16-GISEL-LABEL: v_maximumnum_f16:
+; GFX12-TRUE16-GISEL: ; %bb.0:
+; GFX12-TRUE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l
+; GFX12-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
+; GFX12-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-FAKE16-SDAG-LABEL: v_maximumnum_f16:
+; GFX12-FAKE16-SDAG: ; %bb.0:
+; GFX12-FAKE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v1, v1, v1
+; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GFX12-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v1
+; GFX12-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-FAKE16-GISEL-LABEL: v_maximumnum_f16:
+; GFX12-FAKE16-GISEL: ; %bb.0:
+; GFX12-FAKE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v1, v1, v1
+; GFX12-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v1
+; GFX12-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call half @llvm.maximumnum.f16(half %x, half %y)
ret half %result
}
define half @v_maximumnum_f16_nnan(half %x, half %y) {
-; GFX7-LABEL: v_maximumnum_f16_nnan:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX7-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_maximumnum_f16_nnan:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_maximumnum_f16_nnan:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximumnum_f16_nnan:
; GFX8: ; %bb.0:
@@ -156,13 +259,22 @@ define half @v_maximumnum_f16_nnan(half %x, half %y) {
}
define half @v_maximumnum_f16_1.0(half %x) {
-; GFX7-LABEL: v_maximumnum_f16_1.0:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_max_f32_e32 v0, 1.0, v0
-; GFX7-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_maximumnum_f16_1.0:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT: v_max_f32_e32 v0, 1.0, v0
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_maximumnum_f16_1.0:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, 1.0
+; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximumnum_f16_1.0:
; GFX8: ; %bb.0:
@@ -229,57 +341,109 @@ define half @v_maximumnum_f16_1.0(half %x) {
}
define float @v_maximumnum_f32(float %x, float %y) {
-; GFX7-LABEL: v_maximumnum_f32:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
-; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
-; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_maximumnum_f32:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_mul_f32_e32 v1, 1.0, v1
-; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0
-; GFX8-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: v_maximumnum_f32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_maximumnum_f32:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX10-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX10-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: v_maximumnum_f32:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v0, v0, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_maximumnum_f32:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v1
-; GFX12-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_maximumnum_f32:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_maximumnum_f32:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_maximumnum_f32:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX8-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX8-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_maximumnum_f32:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX8-GISEL-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX8-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: v_maximumnum_f32:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: v_maximumnum_f32:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX9-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_maximumnum_f32:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_maximumnum_f32:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX10-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_maximumnum_f32:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v0, v0, v0
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_maximumnum_f32:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-SDAG-LABEL: v_maximumnum_f32:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0
+; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v1
+; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GISEL-LABEL: v_maximumnum_f32:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v1
+; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call float @llvm.maximumnum.f32(float %x, float %y)
ret float %result
}
@@ -329,59 +493,113 @@ define float @v_maximumnum_f32_nnan(float %x, float %y) {
}
define double @v_maximumnum_f64(double %x, double %y) {
-; GFX7-LABEL: v_maximumnum_f64:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_maximumnum_f64:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: v_maximumnum_f64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_maximumnum_f64:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: v_maximumnum_f64:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_maximumnum_f64:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3]
-; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1]
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[2:3]
-; GFX12-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_maximumnum_f64:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX7-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX7-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_maximumnum_f64:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX7-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX7-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_maximumnum_f64:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX8-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX8-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_maximumnum_f64:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX8-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX8-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: v_maximumnum_f64:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX9-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX9-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: v_maximumnum_f64:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX9-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX9-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_maximumnum_f64:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX10-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX10-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_maximumnum_f64:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX10-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX10-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_maximumnum_f64:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX11-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_maximumnum_f64:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX11-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-SDAG-LABEL: v_maximumnum_f64:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3]
+; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1]
+; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[2:3]
+; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GISEL-LABEL: v_maximumnum_f64:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1]
+; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3]
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[2:3]
+; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call double @llvm.maximumnum.f64(double %x, double %y)
ret double %result
}
@@ -707,505 +925,978 @@ define double @v_maximumnum_f64_1.0(double %x) {
}
define half @v_maximumnum_f16_s_v(half inreg %x, half %y) {
-; GFX7-LABEL: v_maximumnum_f16_s_v:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v1, s16
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_max_f32_e32 v0, v1, v0
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_maximumnum_f16_s_v:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX8-NEXT: v_max_f16_e64 v1, s16, s16
-; GFX8-NEXT: v_max_f16_e32 v0, v1, v0
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX900-LABEL: v_maximumnum_f16_s_v:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX900-NEXT: v_max_f16_e64 v1, s16, s16
-; GFX900-NEXT: v_max_f16_e32 v0, v1, v0
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX950-LABEL: v_maximumnum_f16_s_v:
-; GFX950: ; %bb.0:
-; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX950-NEXT: v_max_f16_e64 v1, s0, s0
-; GFX950-NEXT: v_max_f16_e32 v0, v1, v0
-; GFX950-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_maximumnum_f16_s_v:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX10-NEXT: v_max_f16_e64 v1, s16, s16
-; GFX10-NEXT: v_max_f16_e32 v0, v1, v0
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-TRUE16-LABEL: v_maximumnum_f16_s_v:
-; GFX11-TRUE16: ; %bb.0:
-; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l
-; GFX11-TRUE16-NEXT: v_max_f16_e64 v0.h, s0, s0
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.h, v0.l
-; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FAKE16-LABEL: v_maximumnum_f16_s_v:
-; GFX11-FAKE16: ; %bb.0:
-; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX11-FAKE16-NEXT: v_max_f16_e64 v1, s0, s0
-; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v1, v0
-; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-TRUE16-LABEL: v_maximumnum_f16_s_v:
-; GFX12-TRUE16: ; %bb.0:
-; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
-; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
-; GFX12-TRUE16-NEXT: v_max_num_f16_e64 v0.h, s0, s0
-; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.h, v0.l
-; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-FAKE16-LABEL: v_maximumnum_f16_s_v:
-; GFX12-FAKE16: ; %bb.0:
-; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
-; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0
-; GFX12-FAKE16-NEXT: v_max_num_f16_e64 v1, s0, s0
-; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v0, v1, v0
-; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_maximumnum_f16_s_v:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, s16
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v1, v0
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_maximumnum_f16_s_v:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, s16
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v1, v0
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_maximumnum_f16_s_v:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX8-SDAG-NEXT: v_max_f16_e64 v1, s16, s16
+; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v1, v0
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_maximumnum_f16_s_v:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_max_f16_e64 v1, s16, s16
+; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v1, v0
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_maximumnum_f16_s_v:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX900-SDAG-NEXT: v_max_f16_e64 v1, s16, s16
+; GFX900-SDAG-NEXT: v_max_f16_e32 v0, v1, v0
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_maximumnum_f16_s_v:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f16_e64 v1, s16, s16
+; GFX900-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX900-GISEL-NEXT: v_max_f16_e32 v0, v1, v0
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_maximumnum_f16_s_v:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX950-SDAG-NEXT: v_max_f16_e64 v1, s0, s0
+; GFX950-SDAG-NEXT: v_max_f16_e32 v0, v1, v0
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_maximumnum_f16_s_v:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f16_e64 v1, s0, s0
+; GFX950-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX950-GISEL-NEXT: v_max_f16_e32 v0, v1, v0
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_maximumnum_f16_s_v:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX10-SDAG-NEXT: v_max_f16_e64 v1, s16, s16
+; GFX10-SDAG-NEXT: v_max_f16_e32 v0, v1, v0
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_maximumnum_f16_s_v:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_max_f16_e64 v1, s16, s16
+; GFX10-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX10-GISEL-NEXT: v_max_f16_e32 v0, v1, v0
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-SDAG-LABEL: v_maximumnum_f16_s_v:
+; GFX11-TRUE16-SDAG: ; %bb.0:
+; GFX11-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l
+; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e64 v0.h, s0, s0
+; GFX11-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e32 v0.l, v0.h, v0.l
+; GFX11-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-GISEL-LABEL: v_maximumnum_f16_s_v:
+; GFX11-TRUE16-GISEL: ; %bb.0:
+; GFX11-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e64 v0.h, s0, s0
+; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l
+; GFX11-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e32 v0.l, v0.h, v0.l
+; GFX11-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-SDAG-LABEL: v_maximumnum_f16_s_v:
+; GFX11-FAKE16-SDAG: ; %bb.0:
+; GFX11-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e64 v1, s0, s0
+; GFX11-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e32 v0, v1, v0
+; GFX11-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-GISEL-LABEL: v_maximumnum_f16_s_v:
+; GFX11-FAKE16-GISEL: ; %bb.0:
+; GFX11-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e64 v1, s0, s0
+; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX11-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v0, v1, v0
+; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-TRUE16-SDAG-LABEL: v_maximumnum_f16_s_v:
+; GFX12-TRUE16-SDAG: ; %bb.0:
+; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.h, s0, s0
+; GFX12-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.h, v0.l
+; GFX12-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-TRUE16-GISEL-LABEL: v_maximumnum_f16_s_v:
+; GFX12-TRUE16-GISEL: ; %bb.0:
+; GFX12-TRUE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.h, s0, s0
+; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GFX12-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.h, v0.l
+; GFX12-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-FAKE16-SDAG-LABEL: v_maximumnum_f16_s_v:
+; GFX12-FAKE16-SDAG: ; %bb.0:
+; GFX12-FAKE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v1, s0, s0
+; GFX12-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v1, v0
+; GFX12-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-FAKE16-GISEL-LABEL: v_maximumnum_f16_s_v:
+; GFX12-FAKE16-GISEL: ; %bb.0:
+; GFX12-FAKE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v1, s0, s0
+; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GFX12-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v1, v0
+; GFX12-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call half @llvm.maximumnum.f16(half %x, half %y)
ret half %result
}
define half @v_maximumnum_f16_v_s(half %x, half inreg %y) {
-; GFX7-LABEL: v_maximumnum_f16_v_s:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f16_f32_e32 v1, s16
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_maximumnum_f16_v_s:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_max_f16_e64 v1, s16, s16
-; GFX8-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX8-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX900-LABEL: v_maximumnum_f16_v_s:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_max_f16_e64 v1, s16, s16
-; GFX900-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX900-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX950-LABEL: v_maximumnum_f16_v_s:
-; GFX950: ; %bb.0:
-; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_max_f16_e64 v1, s0, s0
-; GFX950-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX950-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX950-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_maximumnum_f16_v_s:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_max_f16_e64 v1, s16, s16
-; GFX10-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX10-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-TRUE16-LABEL: v_maximumnum_f16_v_s:
-; GFX11-TRUE16: ; %bb.0:
-; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT: v_max_f16_e64 v0.h, s0, s0
-; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.h
-; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FAKE16-LABEL: v_maximumnum_f16_v_s:
-; GFX11-FAKE16: ; %bb.0:
-; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FAKE16-NEXT: v_max_f16_e64 v1, s0, s0
-; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-TRUE16-LABEL: v_maximumnum_f16_v_s:
-; GFX12-TRUE16: ; %bb.0:
-; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
-; GFX12-TRUE16-NEXT: v_max_num_f16_e64 v0.h, s0, s0
-; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
-; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
-; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-FAKE16-LABEL: v_maximumnum_f16_v_s:
-; GFX12-FAKE16: ; %bb.0:
-; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
-; GFX12-FAKE16-NEXT: v_max_num_f16_e64 v1, s0, s0
-; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0
-; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v1
-; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_maximumnum_f16_v_s:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, s16
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_maximumnum_f16_v_s:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, s16
+; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_maximumnum_f16_v_s:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_max_f16_e64 v1, s16, s16
+; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_maximumnum_f16_v_s:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX8-GISEL-NEXT: v_max_f16_e64 v1, s16, s16
+; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_maximumnum_f16_v_s:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f16_e64 v1, s16, s16
+; GFX900-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX900-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_maximumnum_f16_v_s:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX900-GISEL-NEXT: v_max_f16_e64 v1, s16, s16
+; GFX900-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_maximumnum_f16_v_s:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f16_e64 v1, s0, s0
+; GFX950-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX950-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_maximumnum_f16_v_s:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX950-GISEL-NEXT: v_max_f16_e64 v1, s0, s0
+; GFX950-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_maximumnum_f16_v_s:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_max_f16_e64 v1, s16, s16
+; GFX10-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX10-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_maximumnum_f16_v_s:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX10-GISEL-NEXT: v_max_f16_e64 v1, s16, s16
+; GFX10-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-SDAG-LABEL: v_maximumnum_f16_v_s:
+; GFX11-TRUE16-SDAG: ; %bb.0:
+; GFX11-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e64 v0.h, s0, s0
+; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l
+; GFX11-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e32 v0.l, v0.l, v0.h
+; GFX11-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-GISEL-LABEL: v_maximumnum_f16_v_s:
+; GFX11-TRUE16-GISEL: ; %bb.0:
+; GFX11-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l
+; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e64 v0.h, s0, s0
+; GFX11-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e32 v0.l, v0.l, v0.h
+; GFX11-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-SDAG-LABEL: v_maximumnum_f16_v_s:
+; GFX11-FAKE16-SDAG: ; %bb.0:
+; GFX11-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e64 v1, s0, s0
+; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX11-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX11-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-GISEL-LABEL: v_maximumnum_f16_v_s:
+; GFX11-FAKE16-GISEL: ; %bb.0:
+; GFX11-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e64 v1, s0, s0
+; GFX11-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-TRUE16-SDAG-LABEL: v_maximumnum_f16_v_s:
+; GFX12-TRUE16-SDAG: ; %bb.0:
+; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.h, s0, s0
+; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GFX12-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
+; GFX12-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-TRUE16-GISEL-LABEL: v_maximumnum_f16_v_s:
+; GFX12-TRUE16-GISEL: ; %bb.0:
+; GFX12-TRUE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.h, s0, s0
+; GFX12-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
+; GFX12-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-FAKE16-SDAG-LABEL: v_maximumnum_f16_v_s:
+; GFX12-FAKE16-SDAG: ; %bb.0:
+; GFX12-FAKE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v1, s0, s0
+; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GFX12-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v1
+; GFX12-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-FAKE16-GISEL-LABEL: v_maximumnum_f16_v_s:
+; GFX12-FAKE16-GISEL: ; %bb.0:
+; GFX12-FAKE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v1, s0, s0
+; GFX12-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v1
+; GFX12-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call half @llvm.maximumnum.f16(half %x, half %y)
ret half %result
}
define half @v_maximumnum_f16_s_s(half inreg %x, half inreg %y) {
-; GFX7-LABEL: v_maximumnum_f16_s_s:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, s16
-; GFX7-NEXT: v_cvt_f16_f32_e32 v1, s17
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_maximumnum_f16_s_s:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_max_f16_e64 v0, s17, s17
-; GFX8-NEXT: v_max_f16_e64 v1, s16, s16
-; GFX8-NEXT: v_max_f16_e32 v0, v1, v0
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX900-LABEL: v_maximumnum_f16_s_s:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_max_f16_e64 v0, s17, s17
-; GFX900-NEXT: v_max_f16_e64 v1, s16, s16
-; GFX900-NEXT: v_max_f16_e32 v0, v1, v0
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX950-LABEL: v_maximumnum_f16_s_s:
-; GFX950: ; %bb.0:
-; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_max_f16_e64 v0, s1, s1
-; GFX950-NEXT: v_max_f16_e64 v1, s0, s0
-; GFX950-NEXT: v_max_f16_e32 v0, v1, v0
-; GFX950-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_maximumnum_f16_s_s:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_max_f16_e64 v0, s17, s17
-; GFX10-NEXT: v_max_f16_e64 v1, s16, s16
-; GFX10-NEXT: v_max_f16_e32 v0, v1, v0
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-TRUE16-LABEL: v_maximumnum_f16_s_s:
-; GFX11-TRUE16: ; %bb.0:
-; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT: v_max_f16_e64 v0.l, s1, s1
-; GFX11-TRUE16-NEXT: v_max_f16_e64 v0.h, s0, s0
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.h, v0.l
-; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FAKE16-LABEL: v_maximumnum_f16_s_s:
-; GFX11-FAKE16: ; %bb.0:
-; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FAKE16-NEXT: v_max_f16_e64 v0, s1, s1
-; GFX11-FAKE16-NEXT: v_max_f16_e64 v1, s0, s0
-; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v1, v0
-; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-TRUE16-LABEL: v_maximumnum_f16_s_s:
-; GFX12-TRUE16: ; %bb.0:
-; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
-; GFX12-TRUE16-NEXT: v_max_num_f16_e64 v0.l, s1, s1
-; GFX12-TRUE16-NEXT: v_max_num_f16_e64 v0.h, s0, s0
-; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.h, v0.l
-; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-FAKE16-LABEL: v_maximumnum_f16_s_s:
-; GFX12-FAKE16: ; %bb.0:
-; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
-; GFX12-FAKE16-NEXT: v_max_num_f16_e64 v0, s1, s1
-; GFX12-FAKE16-NEXT: v_max_num_f16_e64 v1, s0, s0
-; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v0, v1, v0
-; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_maximumnum_f16_s_s:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, s16
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, s17
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_maximumnum_f16_s_s:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, s16
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, s17
+; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_maximumnum_f16_s_s:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_max_f16_e64 v0, s17, s17
+; GFX8-SDAG-NEXT: v_max_f16_e64 v1, s16, s16
+; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v1, v0
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_maximumnum_f16_s_s:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_max_f16_e64 v0, s16, s16
+; GFX8-GISEL-NEXT: v_max_f16_e64 v1, s17, s17
+; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_maximumnum_f16_s_s:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f16_e64 v0, s17, s17
+; GFX900-SDAG-NEXT: v_max_f16_e64 v1, s16, s16
+; GFX900-SDAG-NEXT: v_max_f16_e32 v0, v1, v0
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_maximumnum_f16_s_s:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f16_e64 v0, s16, s16
+; GFX900-GISEL-NEXT: v_max_f16_e64 v1, s17, s17
+; GFX900-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_maximumnum_f16_s_s:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f16_e64 v0, s1, s1
+; GFX950-SDAG-NEXT: v_max_f16_e64 v1, s0, s0
+; GFX950-SDAG-NEXT: v_max_f16_e32 v0, v1, v0
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_maximumnum_f16_s_s:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f16_e64 v0, s0, s0
+; GFX950-GISEL-NEXT: v_max_f16_e64 v1, s1, s1
+; GFX950-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_maximumnum_f16_s_s:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_max_f16_e64 v0, s17, s17
+; GFX10-SDAG-NEXT: v_max_f16_e64 v1, s16, s16
+; GFX10-SDAG-NEXT: v_max_f16_e32 v0, v1, v0
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_maximumnum_f16_s_s:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_max_f16_e64 v0, s16, s16
+; GFX10-GISEL-NEXT: v_max_f16_e64 v1, s17, s17
+; GFX10-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-SDAG-LABEL: v_maximumnum_f16_s_s:
+; GFX11-TRUE16-SDAG: ; %bb.0:
+; GFX11-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e64 v0.l, s1, s1
+; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e64 v0.h, s0, s0
+; GFX11-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e32 v0.l, v0.h, v0.l
+; GFX11-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-GISEL-LABEL: v_maximumnum_f16_s_s:
+; GFX11-TRUE16-GISEL: ; %bb.0:
+; GFX11-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e64 v0.l, s0, s0
+; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e64 v0.h, s1, s1
+; GFX11-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e32 v0.l, v0.l, v0.h
+; GFX11-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-SDAG-LABEL: v_maximumnum_f16_s_s:
+; GFX11-FAKE16-SDAG: ; %bb.0:
+; GFX11-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e64 v0, s1, s1
+; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e64 v1, s0, s0
+; GFX11-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e32 v0, v1, v0
+; GFX11-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-GISEL-LABEL: v_maximumnum_f16_s_s:
+; GFX11-FAKE16-GISEL: ; %bb.0:
+; GFX11-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e64 v0, s0, s0
+; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e64 v1, s1, s1
+; GFX11-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-TRUE16-SDAG-LABEL: v_maximumnum_f16_s_s:
+; GFX12-TRUE16-SDAG: ; %bb.0:
+; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.l, s1, s1
+; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.h, s0, s0
+; GFX12-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.h, v0.l
+; GFX12-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-TRUE16-GISEL-LABEL: v_maximumnum_f16_s_s:
+; GFX12-TRUE16-GISEL: ; %bb.0:
+; GFX12-TRUE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.l, s0, s0
+; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.h, s1, s1
+; GFX12-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
+; GFX12-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-FAKE16-SDAG-LABEL: v_maximumnum_f16_s_s:
+; GFX12-FAKE16-SDAG: ; %bb.0:
+; GFX12-FAKE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v0, s1, s1
+; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v1, s0, s0
+; GFX12-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v1, v0
+; GFX12-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-FAKE16-GISEL-LABEL: v_maximumnum_f16_s_s:
+; GFX12-FAKE16-GISEL: ; %bb.0:
+; GFX12-FAKE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v0, s0, s0
+; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v1, s1, s1
+; GFX12-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v1
+; GFX12-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call half @llvm.maximumnum.f16(half %x, half %y)
ret half %result
}
define float @v_maximumnum_f32_s_v(float inreg %x, float %y) {
-; GFX7-LABEL: v_maximumnum_f32_s_v:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
-; GFX7-NEXT: v_mul_f32_e64 v1, 1.0, s16
-; GFX7-NEXT: v_max_f32_e32 v0, v1, v0
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_maximumnum_f32_s_v:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0
-; GFX8-NEXT: v_mul_f32_e64 v1, 1.0, s16
-; GFX8-NEXT: v_max_f32_e32 v0, v1, v0
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX900-LABEL: v_maximumnum_f32_s_v:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX900-NEXT: v_max_f32_e64 v1, s16, s16
-; GFX900-NEXT: v_max_f32_e32 v0, v1, v0
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX950-LABEL: v_maximumnum_f32_s_v:
-; GFX950: ; %bb.0:
-; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX950-NEXT: v_max_f32_e64 v1, s0, s0
-; GFX950-NEXT: v_max_f32_e32 v0, v1, v0
-; GFX950-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_maximumnum_f32_s_v:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX10-NEXT: v_max_f32_e64 v1, s16, s16
-; GFX10-NEXT: v_max_f32_e32 v0, v1, v0
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: v_maximumnum_f32_s_v:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX11-NEXT: v_max_f32_e64 v1, s0, s0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_max_f32_e32 v0, v1, v0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_maximumnum_f32_s_v:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v0
-; GFX12-NEXT: v_max_num_f32_e64 v1, s0, s0
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT: v_max_num_f32_e32 v0, v1, v0
-; GFX12-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_maximumnum_f32_s_v:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-SDAG-NEXT: v_mul_f32_e64 v1, 1.0, s16
+; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v1, v0
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_maximumnum_f32_s_v:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_mul_f32_e64 v1, 1.0, s16
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v1, v0
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_maximumnum_f32_s_v:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX8-SDAG-NEXT: v_mul_f32_e64 v1, 1.0, s16
+; GFX8-SDAG-NEXT: v_max_f32_e32 v0, v1, v0
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_maximumnum_f32_s_v:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_mul_f32_e64 v1, 1.0, s16
+; GFX8-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX8-GISEL-NEXT: v_max_f32_e32 v0, v1, v0
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_maximumnum_f32_s_v:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-SDAG-NEXT: v_max_f32_e64 v1, s16, s16
+; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v1, v0
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_maximumnum_f32_s_v:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f32_e64 v1, s16, s16
+; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v1, v0
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_maximumnum_f32_s_v:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-SDAG-NEXT: v_max_f32_e64 v1, s0, s0
+; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v1, v0
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_maximumnum_f32_s_v:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f32_e64 v1, s0, s0
+; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v1, v0
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_maximumnum_f32_s_v:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX10-SDAG-NEXT: v_max_f32_e64 v1, s16, s16
+; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v1, v0
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_maximumnum_f32_s_v:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_max_f32_e64 v1, s16, s16
+; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v1, v0
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_maximumnum_f32_s_v:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX11-SDAG-NEXT: v_max_f32_e64 v1, s0, s0
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_max_f32_e32 v0, v1, v0
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_maximumnum_f32_s_v:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_max_f32_e64 v1, s0, s0
+; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v1, v0
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-SDAG-LABEL: v_maximumnum_f32_s_v:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v0
+; GFX12-SDAG-NEXT: v_max_num_f32_e64 v1, s0, s0
+; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-SDAG-NEXT: v_max_num_f32_e32 v0, v1, v0
+; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GISEL-LABEL: v_maximumnum_f32_s_v:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: v_max_num_f32_e64 v1, s0, s0
+; GFX12-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v0
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-GISEL-NEXT: v_max_num_f32_e32 v0, v1, v0
+; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call float @llvm.maximumnum.f32(float %x, float %y)
ret float %result
}
define float @v_maximumnum_f32_v_s(float %x, float inreg %y) {
-; GFX7-LABEL: v_maximumnum_f32_v_s:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_mul_f32_e64 v1, 1.0, s16
-; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
-; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_maximumnum_f32_v_s:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_mul_f32_e64 v1, 1.0, s16
-; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0
-; GFX8-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX900-LABEL: v_maximumnum_f32_v_s:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_max_f32_e64 v1, s16, s16
-; GFX900-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX900-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX950-LABEL: v_maximumnum_f32_v_s:
-; GFX950: ; %bb.0:
-; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_max_f32_e64 v1, s0, s0
-; GFX950-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX950-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX950-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_maximumnum_f32_v_s:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_max_f32_e64 v1, s16, s16
-; GFX10-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX10-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: v_maximumnum_f32_v_s:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_max_f32_e64 v1, s0, s0
-; GFX11-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_maximumnum_f32_v_s:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_max_num_f32_e64 v1, s0, s0
-; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v0
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v1
-; GFX12-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_maximumnum_f32_v_s:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_mul_f32_e64 v1, 1.0, s16
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_maximumnum_f32_v_s:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-GISEL-NEXT: v_mul_f32_e64 v1, 1.0, s16
+; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_maximumnum_f32_v_s:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_mul_f32_e64 v1, 1.0, s16
+; GFX8-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX8-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_maximumnum_f32_v_s:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX8-GISEL-NEXT: v_mul_f32_e64 v1, 1.0, s16
+; GFX8-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_maximumnum_f32_v_s:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f32_e64 v1, s16, s16
+; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_maximumnum_f32_v_s:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-GISEL-NEXT: v_max_f32_e64 v1, s16, s16
+; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_maximumnum_f32_v_s:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f32_e64 v1, s0, s0
+; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_maximumnum_f32_v_s:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-GISEL-NEXT: v_max_f32_e64 v1, s0, s0
+; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_maximumnum_f32_v_s:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_max_f32_e64 v1, s16, s16
+; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_maximumnum_f32_v_s:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX10-GISEL-NEXT: v_max_f32_e64 v1, s16, s16
+; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_maximumnum_f32_v_s:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_max_f32_e64 v1, s0, s0
+; GFX11-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_maximumnum_f32_v_s:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX11-GISEL-NEXT: v_max_f32_e64 v1, s0, s0
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-SDAG-LABEL: v_maximumnum_f32_v_s:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: v_max_num_f32_e64 v1, s0, s0
+; GFX12-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v0
+; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v1
+; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GISEL-LABEL: v_maximumnum_f32_v_s:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v0
+; GFX12-GISEL-NEXT: v_max_num_f32_e64 v1, s0, s0
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v1
+; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call float @llvm.maximumnum.f32(float %x, float %y)
ret float %result
}
define float @v_maximumnum_f32_s_s(float inreg %x, float inreg %y) {
-; GFX7-LABEL: v_maximumnum_f32_s_s:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_mul_f32_e64 v0, 1.0, s17
-; GFX7-NEXT: v_mul_f32_e64 v1, 1.0, s16
-; GFX7-NEXT: v_max_f32_e32 v0, v1, v0
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_maximumnum_f32_s_s:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_mul_f32_e64 v0, 1.0, s17
-; GFX8-NEXT: v_mul_f32_e64 v1, 1.0, s16
-; GFX8-NEXT: v_max_f32_e32 v0, v1, v0
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX900-LABEL: v_maximumnum_f32_s_s:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_max_f32_e64 v0, s17, s17
-; GFX900-NEXT: v_max_f32_e64 v1, s16, s16
-; GFX900-NEXT: v_max_f32_e32 v0, v1, v0
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX950-LABEL: v_maximumnum_f32_s_s:
-; GFX950: ; %bb.0:
-; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_max_f32_e64 v0, s1, s1
-; GFX950-NEXT: v_max_f32_e64 v1, s0, s0
-; GFX950-NEXT: v_max_f32_e32 v0, v1, v0
-; GFX950-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_maximumnum_f32_s_s:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_max_f32_e64 v0, s17, s17
-; GFX10-NEXT: v_max_f32_e64 v1, s16, s16
-; GFX10-NEXT: v_max_f32_e32 v0, v1, v0
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: v_maximumnum_f32_s_s:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_max_f32_e64 v0, s1, s1
-; GFX11-NEXT: v_max_f32_e64 v1, s0, s0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_max_f32_e32 v0, v1, v0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_maximumnum_f32_s_s:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_max_num_f32_e64 v0, s1, s1
-; GFX12-NEXT: v_max_num_f32_e64 v1, s0, s0
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT: v_max_num_f32_e32 v0, v1, v0
-; GFX12-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_maximumnum_f32_s_s:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_mul_f32_e64 v0, 1.0, s17
+; GFX7-SDAG-NEXT: v_mul_f32_e64 v1, 1.0, s16
+; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v1, v0
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_maximumnum_f32_s_s:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_mul_f32_e64 v0, 1.0, s16
+; GFX7-GISEL-NEXT: v_mul_f32_e64 v1, 1.0, s17
+; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_maximumnum_f32_s_s:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_mul_f32_e64 v0, 1.0, s17
+; GFX8-SDAG-NEXT: v_mul_f32_e64 v1, 1.0, s16
+; GFX8-SDAG-NEXT: v_max_f32_e32 v0, v1, v0
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_maximumnum_f32_s_s:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_mul_f32_e64 v0, 1.0, s16
+; GFX8-GISEL-NEXT: v_mul_f32_e64 v1, 1.0, s17
+; GFX8-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_maximumnum_f32_s_s:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f32_e64 v0, s17, s17
+; GFX900-SDAG-NEXT: v_max_f32_e64 v1, s16, s16
+; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v1, v0
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_maximumnum_f32_s_s:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f32_e64 v0, s16, s16
+; GFX900-GISEL-NEXT: v_max_f32_e64 v1, s17, s17
+; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_maximumnum_f32_s_s:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f32_e64 v0, s1, s1
+; GFX950-SDAG-NEXT: v_max_f32_e64 v1, s0, s0
+; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v1, v0
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_maximumnum_f32_s_s:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f32_e64 v0, s0, s0
+; GFX950-GISEL-NEXT: v_max_f32_e64 v1, s1, s1
+; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_maximumnum_f32_s_s:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_max_f32_e64 v0, s17, s17
+; GFX10-SDAG-NEXT: v_max_f32_e64 v1, s16, s16
+; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v1, v0
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_maximumnum_f32_s_s:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_max_f32_e64 v0, s16, s16
+; GFX10-GISEL-NEXT: v_max_f32_e64 v1, s17, s17
+; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_maximumnum_f32_s_s:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_max_f32_e64 v0, s1, s1
+; GFX11-SDAG-NEXT: v_max_f32_e64 v1, s0, s0
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_max_f32_e32 v0, v1, v0
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_maximumnum_f32_s_s:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_max_f32_e64 v0, s0, s0
+; GFX11-GISEL-NEXT: v_max_f32_e64 v1, s1, s1
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-SDAG-LABEL: v_maximumnum_f32_s_s:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: v_max_num_f32_e64 v0, s1, s1
+; GFX12-SDAG-NEXT: v_max_num_f32_e64 v1, s0, s0
+; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-SDAG-NEXT: v_max_num_f32_e32 v0, v1, v0
+; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GISEL-LABEL: v_maximumnum_f32_s_s:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: v_max_num_f32_e64 v0, s0, s0
+; GFX12-GISEL-NEXT: v_max_num_f32_e64 v1, s1, s1
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v1
+; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call float @llvm.maximumnum.f32(float %x, float %y)
ret float %result
}
define double @v_maximumnum_f64_s_v(double inreg %x, double %y) {
-; GFX7-LABEL: v_maximumnum_f64_s_v:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX7-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
-; GFX7-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1]
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_maximumnum_f64_s_v:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX8-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
-; GFX8-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1]
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX900-LABEL: v_maximumnum_f64_s_v:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX900-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
-; GFX900-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1]
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX950-LABEL: v_maximumnum_f64_s_v:
-; GFX950: ; %bb.0:
-; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX950-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1]
-; GFX950-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1]
-; GFX950-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_maximumnum_f64_s_v:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX7-SDAG-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
+; GFX7-SDAG-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1]
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_maximumnum_f64_s_v:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
+; GFX7-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX7-GISEL-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1]
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_maximumnum_f64_s_v:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX8-SDAG-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
+; GFX8-SDAG-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1]
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_maximumnum_f64_s_v:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
+; GFX8-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX8-GISEL-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1]
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_maximumnum_f64_s_v:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX900-SDAG-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
+; GFX900-SDAG-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1]
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_maximumnum_f64_s_v:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
+; GFX900-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX900-GISEL-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1]
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_maximumnum_f64_s_v:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX950-SDAG-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1]
+; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1]
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_maximumnum_f64_s_v:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1]
+; GFX950-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX950-GISEL-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1]
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_f64_s_v:
; GFX10: ; %bb.0:
@@ -1241,37 +1932,69 @@ define double @v_maximumnum_f64_s_v(double inreg %x, double %y) {
}
define double @v_maximumnum_f64_v_s(double %x, double inreg %y) {
-; GFX7-LABEL: v_maximumnum_f64_v_s:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
-; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_maximumnum_f64_v_s:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
-; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX900-LABEL: v_maximumnum_f64_v_s:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
-; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX950-LABEL: v_maximumnum_f64_v_s:
-; GFX950: ; %bb.0:
-; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1]
-; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
-; GFX950-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_maximumnum_f64_v_s:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
+; GFX7-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX7-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_maximumnum_f64_v_s:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX7-GISEL-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
+; GFX7-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_maximumnum_f64_v_s:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
+; GFX8-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX8-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_maximumnum_f64_v_s:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX8-GISEL-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
+; GFX8-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_maximumnum_f64_v_s:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
+; GFX900-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX900-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_maximumnum_f64_v_s:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX900-GISEL-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
+; GFX900-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_maximumnum_f64_v_s:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1]
+; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_maximumnum_f64_v_s:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX950-GISEL-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1]
+; GFX950-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_f64_v_s:
; GFX10: ; %bb.0:
@@ -1307,184 +2030,354 @@ define double @v_maximumnum_f64_v_s(double %x, double inreg %y) {
}
define double @v_maximumnum_f64_s_s(double inreg %x, double inreg %y) {
-; GFX7-LABEL: v_maximumnum_f64_s_s:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_max_f64 v[0:1], s[18:19], s[18:19]
-; GFX7-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
-; GFX7-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1]
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_maximumnum_f64_s_s:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_max_f64 v[0:1], s[18:19], s[18:19]
-; GFX8-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
-; GFX8-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1]
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX900-LABEL: v_maximumnum_f64_s_s:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_max_f64 v[0:1], s[18:19], s[18:19]
-; GFX900-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
-; GFX900-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1]
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX950-LABEL: v_maximumnum_f64_s_s:
-; GFX950: ; %bb.0:
-; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_max_f64 v[0:1], s[2:3], s[2:3]
-; GFX950-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1]
-; GFX950-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1]
-; GFX950-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_maximumnum_f64_s_s:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_max_f64 v[0:1], s[18:19], s[18:19]
-; GFX10-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
-; GFX10-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1]
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: v_maximumnum_f64_s_s:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_max_f64 v[0:1], s[2:3], s[2:3]
-; GFX11-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1]
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1]
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_maximumnum_f64_s_s:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_max_num_f64_e64 v[0:1], s[2:3], s[2:3]
-; GFX12-NEXT: v_max_num_f64_e64 v[2:3], s[0:1], s[0:1]
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[2:3], v[0:1]
-; GFX12-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_maximumnum_f64_s_s:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_max_f64 v[0:1], s[18:19], s[18:19]
+; GFX7-SDAG-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
+; GFX7-SDAG-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1]
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_maximumnum_f64_s_s:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_max_f64 v[0:1], s[16:17], s[16:17]
+; GFX7-GISEL-NEXT: v_max_f64 v[2:3], s[18:19], s[18:19]
+; GFX7-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_maximumnum_f64_s_s:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_max_f64 v[0:1], s[18:19], s[18:19]
+; GFX8-SDAG-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
+; GFX8-SDAG-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1]
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_maximumnum_f64_s_s:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_max_f64 v[0:1], s[16:17], s[16:17]
+; GFX8-GISEL-NEXT: v_max_f64 v[2:3], s[18:19], s[18:19]
+; GFX8-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_maximumnum_f64_s_s:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f64 v[0:1], s[18:19], s[18:19]
+; GFX900-SDAG-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
+; GFX900-SDAG-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1]
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_maximumnum_f64_s_s:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f64 v[0:1], s[16:17], s[16:17]
+; GFX900-GISEL-NEXT: v_max_f64 v[2:3], s[18:19], s[18:19]
+; GFX900-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_maximumnum_f64_s_s:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f64 v[0:1], s[2:3], s[2:3]
+; GFX950-SDAG-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1]
+; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1]
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_maximumnum_f64_s_s:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f64 v[0:1], s[0:1], s[0:1]
+; GFX950-GISEL-NEXT: v_max_f64 v[2:3], s[2:3], s[2:3]
+; GFX950-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_maximumnum_f64_s_s:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_max_f64 v[0:1], s[18:19], s[18:19]
+; GFX10-SDAG-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
+; GFX10-SDAG-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1]
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_maximumnum_f64_s_s:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_max_f64 v[0:1], s[16:17], s[16:17]
+; GFX10-GISEL-NEXT: v_max_f64 v[2:3], s[18:19], s[18:19]
+; GFX10-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_maximumnum_f64_s_s:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_max_f64 v[0:1], s[2:3], s[2:3]
+; GFX11-SDAG-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1]
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1]
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_maximumnum_f64_s_s:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_max_f64 v[0:1], s[0:1], s[0:1]
+; GFX11-GISEL-NEXT: v_max_f64 v[2:3], s[2:3], s[2:3]
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-SDAG-LABEL: v_maximumnum_f64_s_s:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: v_max_num_f64_e64 v[0:1], s[2:3], s[2:3]
+; GFX12-SDAG-NEXT: v_max_num_f64_e64 v[2:3], s[0:1], s[0:1]
+; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[0:1], v[2:3], v[0:1]
+; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GISEL-LABEL: v_maximumnum_f64_s_s:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: v_max_num_f64_e64 v[0:1], s[0:1], s[0:1]
+; GFX12-GISEL-NEXT: v_max_num_f64_e64 v[2:3], s[2:3], s[2:3]
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[2:3]
+; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call double @llvm.maximumnum.f64(double %x, double %y)
ret double %result
}
define float @v_maximumnum_f32_fabs_rhs(float %x, float %y) {
-; GFX7-LABEL: v_maximumnum_f32_fabs_rhs:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_mul_f32_e64 v1, 1.0, |v1|
-; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
-; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_maximumnum_f32_fabs_rhs:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_mul_f32_e64 v1, 1.0, |v1|
-; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0
-; GFX8-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: v_maximumnum_f32_fabs_rhs:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e64 v1, |v1|, |v1|
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_maximumnum_f32_fabs_rhs:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_max_f32_e64 v1, |v1|, |v1|
-; GFX10-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX10-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: v_maximumnum_f32_fabs_rhs:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_max_f32_e64 v1, |v1|, |v1|
-; GFX11-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_maximumnum_f32_fabs_rhs:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_max_num_f32_e64 v1, |v1|, |v1|
-; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v0
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v1
-; GFX12-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_maximumnum_f32_fabs_rhs:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_mul_f32_e64 v1, 1.0, |v1|
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_maximumnum_f32_fabs_rhs:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-GISEL-NEXT: v_mul_f32_e64 v1, 1.0, |v1|
+; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_maximumnum_f32_fabs_rhs:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_mul_f32_e64 v1, 1.0, |v1|
+; GFX8-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX8-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_maximumnum_f32_fabs_rhs:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX8-GISEL-NEXT: v_mul_f32_e64 v1, 1.0, |v1|
+; GFX8-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: v_maximumnum_f32_fabs_rhs:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1|
+; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: v_maximumnum_f32_fabs_rhs:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX9-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1|
+; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_maximumnum_f32_fabs_rhs:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1|
+; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_maximumnum_f32_fabs_rhs:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX10-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1|
+; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_maximumnum_f32_fabs_rhs:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1|
+; GFX11-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_maximumnum_f32_fabs_rhs:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX11-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1|
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-SDAG-LABEL: v_maximumnum_f32_fabs_rhs:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: v_max_num_f32_e64 v1, |v1|, |v1|
+; GFX12-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v0
+; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v1
+; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GISEL-LABEL: v_maximumnum_f32_fabs_rhs:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v0
+; GFX12-GISEL-NEXT: v_max_num_f32_e64 v1, |v1|, |v1|
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v1
+; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
%fabs.y = call float @llvm.fabs.f32(float %y)
%result = call float @llvm.maximumnum.f32(float %x, float %fabs.y)
ret float %result
}
define float @v_maximumnum_f32_fneg_fabs_rhs(float %x, float %y) {
-; GFX7-LABEL: v_maximumnum_f32_fneg_fabs_rhs:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_mul_f32_e64 v1, -1.0, |v1|
-; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
-; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_maximumnum_f32_fneg_fabs_rhs:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_mul_f32_e64 v1, -1.0, |v1|
-; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0
-; GFX8-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: v_maximumnum_f32_fneg_fabs_rhs:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e64 v1, -|v1|, -|v1|
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_maximumnum_f32_fneg_fabs_rhs:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_max_f32_e64 v1, -|v1|, -|v1|
-; GFX10-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX10-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: v_maximumnum_f32_fneg_fabs_rhs:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_max_f32_e64 v1, -|v1|, -|v1|
-; GFX11-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_maximumnum_f32_fneg_fabs_rhs:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_max_num_f32_e64 v1, -|v1|, -|v1|
-; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v0
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v1
-; GFX12-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_maximumnum_f32_fneg_fabs_rhs:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_mul_f32_e64 v1, -1.0, |v1|
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_maximumnum_f32_fneg_fabs_rhs:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-GISEL-NEXT: v_mul_f32_e64 v1, -1.0, |v1|
+; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_maximumnum_f32_fneg_fabs_rhs:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_mul_f32_e64 v1, -1.0, |v1|
+; GFX8-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX8-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_maximumnum_f32_fneg_fabs_rhs:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX8-GISEL-NEXT: v_mul_f32_e64 v1, -1.0, |v1|
+; GFX8-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: v_maximumnum_f32_fneg_fabs_rhs:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_max_f32_e64 v1, -|v1|, -|v1|
+; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: v_maximumnum_f32_fneg_fabs_rhs:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX9-GISEL-NEXT: v_max_f32_e64 v1, -|v1|, -|v1|
+; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_maximumnum_f32_fneg_fabs_rhs:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_max_f32_e64 v1, -|v1|, -|v1|
+; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_maximumnum_f32_fneg_fabs_rhs:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX10-GISEL-NEXT: v_max_f32_e64 v1, -|v1|, -|v1|
+; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_maximumnum_f32_fneg_fabs_rhs:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_max_f32_e64 v1, -|v1|, -|v1|
+; GFX11-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_maximumnum_f32_fneg_fabs_rhs:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX11-GISEL-NEXT: v_max_f32_e64 v1, -|v1|, -|v1|
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-SDAG-LABEL: v_maximumnum_f32_fneg_fabs_rhs:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: v_max_num_f32_e64 v1, -|v1|, -|v1|
+; GFX12-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v0
+; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v1
+; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GISEL-LABEL: v_maximumnum_f32_fneg_fabs_rhs:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v0
+; GFX12-GISEL-NEXT: v_max_num_f32_e64 v1, -|v1|, -|v1|
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v1
+; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
%fabs.y = call float @llvm.fabs.f32(float %y)
%fneg.fabs.y = fneg float %fabs.y
%result = call float @llvm.maximumnum.f32(float %x, float %fneg.fabs.y)
@@ -1492,59 +2385,113 @@ define float @v_maximumnum_f32_fneg_fabs_rhs(float %x, float %y) {
}
define float @v_maximumnum_f32_fabs(float %x, float %y) {
-; GFX7-LABEL: v_maximumnum_f32_fabs:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_mul_f32_e64 v1, 1.0, |v1|
-; GFX7-NEXT: v_mul_f32_e64 v0, 1.0, |v0|
-; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_maximumnum_f32_fabs:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_mul_f32_e64 v1, 1.0, |v1|
-; GFX8-NEXT: v_mul_f32_e64 v0, 1.0, |v0|
-; GFX8-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: v_maximumnum_f32_fabs:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e64 v1, |v1|, |v1|
-; GFX9-NEXT: v_max_f32_e64 v0, |v0|, |v0|
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_maximumnum_f32_fabs:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_max_f32_e64 v1, |v1|, |v1|
-; GFX10-NEXT: v_max_f32_e64 v0, |v0|, |v0|
-; GFX10-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: v_maximumnum_f32_fabs:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_max_f32_e64 v1, |v1|, |v1|
-; GFX11-NEXT: v_max_f32_e64 v0, |v0|, |v0|
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_maximumnum_f32_fabs:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_max_num_f32_e64 v1, |v1|, |v1|
-; GFX12-NEXT: v_max_num_f32_e64 v0, |v0|, |v0|
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v1
-; GFX12-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_maximumnum_f32_fabs:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_mul_f32_e64 v1, 1.0, |v1|
+; GFX7-SDAG-NEXT: v_mul_f32_e64 v0, 1.0, |v0|
+; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_maximumnum_f32_fabs:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_mul_f32_e64 v0, 1.0, |v0|
+; GFX7-GISEL-NEXT: v_mul_f32_e64 v1, 1.0, |v1|
+; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_maximumnum_f32_fabs:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_mul_f32_e64 v1, 1.0, |v1|
+; GFX8-SDAG-NEXT: v_mul_f32_e64 v0, 1.0, |v0|
+; GFX8-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_maximumnum_f32_fabs:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_mul_f32_e64 v0, 1.0, |v0|
+; GFX8-GISEL-NEXT: v_mul_f32_e64 v1, 1.0, |v1|
+; GFX8-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: v_maximumnum_f32_fabs:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1|
+; GFX9-SDAG-NEXT: v_max_f32_e64 v0, |v0|, |v0|
+; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: v_maximumnum_f32_fabs:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_max_f32_e64 v0, |v0|, |v0|
+; GFX9-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1|
+; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_maximumnum_f32_fabs:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1|
+; GFX10-SDAG-NEXT: v_max_f32_e64 v0, |v0|, |v0|
+; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_maximumnum_f32_fabs:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_max_f32_e64 v0, |v0|, |v0|
+; GFX10-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1|
+; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_maximumnum_f32_fabs:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1|
+; GFX11-SDAG-NEXT: v_max_f32_e64 v0, |v0|, |v0|
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_maximumnum_f32_fabs:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_max_f32_e64 v0, |v0|, |v0|
+; GFX11-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1|
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-SDAG-LABEL: v_maximumnum_f32_fabs:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: v_max_num_f32_e64 v1, |v1|, |v1|
+; GFX12-SDAG-NEXT: v_max_num_f32_e64 v0, |v0|, |v0|
+; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v1
+; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GISEL-LABEL: v_maximumnum_f32_fabs:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: v_max_num_f32_e64 v0, |v0|, |v0|
+; GFX12-GISEL-NEXT: v_max_num_f32_e64 v1, |v1|, |v1|
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v1
+; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
%fabs.x = call float @llvm.fabs.f32(float %x)
%fabs.y = call float @llvm.fabs.f32(float %y)
%result = call float @llvm.maximumnum.f32(float %fabs.x, float %fabs.y)
@@ -1552,59 +2499,113 @@ define float @v_maximumnum_f32_fabs(float %x, float %y) {
}
define float @v_maximumnum_f32_fneg(float %x, float %y) {
-; GFX7-LABEL: v_maximumnum_f32_fneg:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_mul_f32_e32 v1, -1.0, v1
-; GFX7-NEXT: v_mul_f32_e32 v0, -1.0, v0
-; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_maximumnum_f32_fneg:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_mul_f32_e32 v1, -1.0, v1
-; GFX8-NEXT: v_mul_f32_e32 v0, -1.0, v0
-; GFX8-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: v_maximumnum_f32_fneg:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e64 v1, -v1, -v1
-; GFX9-NEXT: v_max_f32_e64 v0, -v0, -v0
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_maximumnum_f32_fneg:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_max_f32_e64 v1, -v1, -v1
-; GFX10-NEXT: v_max_f32_e64 v0, -v0, -v0
-; GFX10-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: v_maximumnum_f32_fneg:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_max_f32_e64 v1, -v1, -v1
-; GFX11-NEXT: v_max_f32_e64 v0, -v0, -v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_maximumnum_f32_fneg:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_max_num_f32_e64 v1, -v1, -v1
-; GFX12-NEXT: v_max_num_f32_e64 v0, -v0, -v0
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v1
-; GFX12-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_maximumnum_f32_fneg:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v1, -1.0, v1
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, -1.0, v0
+; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_maximumnum_f32_fneg:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, -1.0, v0
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v1, -1.0, v1
+; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_maximumnum_f32_fneg:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_mul_f32_e32 v1, -1.0, v1
+; GFX8-SDAG-NEXT: v_mul_f32_e32 v0, -1.0, v0
+; GFX8-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_maximumnum_f32_fneg:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_mul_f32_e32 v0, -1.0, v0
+; GFX8-GISEL-NEXT: v_mul_f32_e32 v1, -1.0, v1
+; GFX8-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: v_maximumnum_f32_fneg:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_max_f32_e64 v1, -v1, -v1
+; GFX9-SDAG-NEXT: v_max_f32_e64 v0, -v0, -v0
+; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: v_maximumnum_f32_fneg:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0
+; GFX9-GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1
+; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_maximumnum_f32_fneg:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_max_f32_e64 v1, -v1, -v1
+; GFX10-SDAG-NEXT: v_max_f32_e64 v0, -v0, -v0
+; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_maximumnum_f32_fneg:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0
+; GFX10-GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1
+; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_maximumnum_f32_fneg:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_max_f32_e64 v1, -v1, -v1
+; GFX11-SDAG-NEXT: v_max_f32_e64 v0, -v0, -v0
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_maximumnum_f32_fneg:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0
+; GFX11-GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-SDAG-LABEL: v_maximumnum_f32_fneg:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: v_max_num_f32_e64 v1, -v1, -v1
+; GFX12-SDAG-NEXT: v_max_num_f32_e64 v0, -v0, -v0
+; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v1
+; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GISEL-LABEL: v_maximumnum_f32_fneg:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: v_max_num_f32_e64 v0, -v0, -v0
+; GFX12-GISEL-NEXT: v_max_num_f32_e64 v1, -v1, -v1
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v1
+; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
%fneg.x = fneg float %x
%fneg.y = fneg float %y
%result = call float @llvm.maximumnum.f32(float %fneg.x, float %fneg.y)
@@ -1612,166 +2613,320 @@ define float @v_maximumnum_f32_fneg(float %x, float %y) {
}
define half @v_maximumnum_f16_fabs_rhs(half %x, half %y) {
-; GFX7-LABEL: v_maximumnum_f16_fabs_rhs:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e64 v1, |v1|
-; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_maximumnum_f16_fabs_rhs:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_max_f16_e64 v1, |v1|, |v1|
-; GFX8-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX8-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: v_maximumnum_f16_fabs_rhs:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f16_e64 v1, |v1|, |v1|
-; GFX9-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX9-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_maximumnum_f16_fabs_rhs:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_max_f16_e64 v1, |v1|, |v1|
-; GFX10-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX10-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-TRUE16-LABEL: v_maximumnum_f16_fabs_rhs:
-; GFX11-TRUE16: ; %bb.0:
-; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT: v_max_f16_e64 v0.h, |v1.l|, |v1.l|
-; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.h
-; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FAKE16-LABEL: v_maximumnum_f16_fabs_rhs:
-; GFX11-FAKE16: ; %bb.0:
-; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FAKE16-NEXT: v_max_f16_e64 v1, |v1|, |v1|
-; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-TRUE16-LABEL: v_maximumnum_f16_fabs_rhs:
-; GFX12-TRUE16: ; %bb.0:
-; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
-; GFX12-TRUE16-NEXT: v_max_num_f16_e64 v0.h, |v1.l|, |v1.l|
-; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
-; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
-; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-FAKE16-LABEL: v_maximumnum_f16_fabs_rhs:
-; GFX12-FAKE16: ; %bb.0:
-; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
-; GFX12-FAKE16-NEXT: v_max_num_f16_e64 v1, |v1|, |v1|
-; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0
-; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v1
-; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_maximumnum_f16_fabs_rhs:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e64 v1, |v1|
+; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_maximumnum_f16_fabs_rhs:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v1, |v1|
+; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_maximumnum_f16_fabs_rhs:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_maximumnum_f16_fabs_rhs:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX8-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: v_maximumnum_f16_fabs_rhs:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: v_maximumnum_f16_fabs_rhs:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX9-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_maximumnum_f16_fabs_rhs:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX10-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX10-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_maximumnum_f16_fabs_rhs:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX10-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX10-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-SDAG-LABEL: v_maximumnum_f16_fabs_rhs:
+; GFX11-TRUE16-SDAG: ; %bb.0:
+; GFX11-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e64 v0.h, |v1.l|, |v1.l|
+; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l
+; GFX11-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e32 v0.l, v0.l, v0.h
+; GFX11-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-GISEL-LABEL: v_maximumnum_f16_fabs_rhs:
+; GFX11-TRUE16-GISEL: ; %bb.0:
+; GFX11-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l
+; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e64 v0.h, |v1.l|, |v1.l|
+; GFX11-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e32 v0.l, v0.l, v0.h
+; GFX11-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-SDAG-LABEL: v_maximumnum_f16_fabs_rhs:
+; GFX11-FAKE16-SDAG: ; %bb.0:
+; GFX11-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX11-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX11-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-GISEL-LABEL: v_maximumnum_f16_fabs_rhs:
+; GFX11-FAKE16-GISEL: ; %bb.0:
+; GFX11-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX11-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-TRUE16-SDAG-LABEL: v_maximumnum_f16_fabs_rhs:
+; GFX12-TRUE16-SDAG: ; %bb.0:
+; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.h, |v1.l|, |v1.l|
+; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GFX12-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
+; GFX12-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-TRUE16-GISEL-LABEL: v_maximumnum_f16_fabs_rhs:
+; GFX12-TRUE16-GISEL: ; %bb.0:
+; GFX12-TRUE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.h, |v1.l|, |v1.l|
+; GFX12-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
+; GFX12-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-FAKE16-SDAG-LABEL: v_maximumnum_f16_fabs_rhs:
+; GFX12-FAKE16-SDAG: ; %bb.0:
+; GFX12-FAKE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v1, |v1|, |v1|
+; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GFX12-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v1
+; GFX12-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-FAKE16-GISEL-LABEL: v_maximumnum_f16_fabs_rhs:
+; GFX12-FAKE16-GISEL: ; %bb.0:
+; GFX12-FAKE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v1, |v1|, |v1|
+; GFX12-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v1
+; GFX12-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
%fabs.y = call half @llvm.fabs.f16(half %y)
%result = call half @llvm.maximumnum.f16(half %x, half %fabs.y)
ret half %result
}
define half @v_maximumnum_f16_fneg_fabs_rhs(half %x, half %y) {
-; GFX7-LABEL: v_maximumnum_f16_fneg_fabs_rhs:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e64 v1, -|v1|
-; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_maximumnum_f16_fneg_fabs_rhs:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_max_f16_e64 v1, -|v1|, -|v1|
-; GFX8-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX8-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: v_maximumnum_f16_fneg_fabs_rhs:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f16_e64 v1, -|v1|, -|v1|
-; GFX9-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX9-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_maximumnum_f16_fneg_fabs_rhs:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_max_f16_e64 v1, -|v1|, -|v1|
-; GFX10-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX10-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-TRUE16-LABEL: v_maximumnum_f16_fneg_fabs_rhs:
-; GFX11-TRUE16: ; %bb.0:
-; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT: v_max_f16_e64 v0.h, -|v1.l|, -|v1.l|
-; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.h
-; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FAKE16-LABEL: v_maximumnum_f16_fneg_fabs_rhs:
-; GFX11-FAKE16: ; %bb.0:
-; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FAKE16-NEXT: v_max_f16_e64 v1, -|v1|, -|v1|
-; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-TRUE16-LABEL: v_maximumnum_f16_fneg_fabs_rhs:
-; GFX12-TRUE16: ; %bb.0:
-; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
-; GFX12-TRUE16-NEXT: v_max_num_f16_e64 v0.h, -|v1.l|, -|v1.l|
-; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
-; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
-; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-FAKE16-LABEL: v_maximumnum_f16_fneg_fabs_rhs:
-; GFX12-FAKE16: ; %bb.0:
-; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
-; GFX12-FAKE16-NEXT: v_max_num_f16_e64 v1, -|v1|, -|v1|
-; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0
-; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v1
-; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_maximumnum_f16_fneg_fabs_rhs:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e64 v1, -|v1|
+; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_maximumnum_f16_fneg_fabs_rhs:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v1, -|v1|
+; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_maximumnum_f16_fneg_fabs_rhs:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_max_f16_e64 v1, -|v1|, -|v1|
+; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_maximumnum_f16_fneg_fabs_rhs:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX8-GISEL-NEXT: v_max_f16_e64 v1, -|v1|, -|v1|
+; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: v_maximumnum_f16_fneg_fabs_rhs:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_max_f16_e64 v1, -|v1|, -|v1|
+; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: v_maximumnum_f16_fneg_fabs_rhs:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX9-GISEL-NEXT: v_max_f16_e64 v1, -|v1|, -|v1|
+; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_maximumnum_f16_fneg_fabs_rhs:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_max_f16_e64 v1, -|v1|, -|v1|
+; GFX10-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX10-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_maximumnum_f16_fneg_fabs_rhs:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX10-GISEL-NEXT: v_max_f16_e64 v1, -|v1|, -|v1|
+; GFX10-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-SDAG-LABEL: v_maximumnum_f16_fneg_fabs_rhs:
+; GFX11-TRUE16-SDAG: ; %bb.0:
+; GFX11-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e64 v0.h, -|v1.l|, -|v1.l|
+; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l
+; GFX11-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e32 v0.l, v0.l, v0.h
+; GFX11-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-GISEL-LABEL: v_maximumnum_f16_fneg_fabs_rhs:
+; GFX11-TRUE16-GISEL: ; %bb.0:
+; GFX11-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l
+; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e64 v0.h, -|v1.l|, -|v1.l|
+; GFX11-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e32 v0.l, v0.l, v0.h
+; GFX11-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-SDAG-LABEL: v_maximumnum_f16_fneg_fabs_rhs:
+; GFX11-FAKE16-SDAG: ; %bb.0:
+; GFX11-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e64 v1, -|v1|, -|v1|
+; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX11-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX11-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-GISEL-LABEL: v_maximumnum_f16_fneg_fabs_rhs:
+; GFX11-FAKE16-GISEL: ; %bb.0:
+; GFX11-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e64 v1, -|v1|, -|v1|
+; GFX11-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-TRUE16-SDAG-LABEL: v_maximumnum_f16_fneg_fabs_rhs:
+; GFX12-TRUE16-SDAG: ; %bb.0:
+; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.h, -|v1.l|, -|v1.l|
+; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GFX12-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
+; GFX12-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-TRUE16-GISEL-LABEL: v_maximumnum_f16_fneg_fabs_rhs:
+; GFX12-TRUE16-GISEL: ; %bb.0:
+; GFX12-TRUE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.h, -|v1.l|, -|v1.l|
+; GFX12-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
+; GFX12-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-FAKE16-SDAG-LABEL: v_maximumnum_f16_fneg_fabs_rhs:
+; GFX12-FAKE16-SDAG: ; %bb.0:
+; GFX12-FAKE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v1, -|v1|, -|v1|
+; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GFX12-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v1
+; GFX12-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-FAKE16-GISEL-LABEL: v_maximumnum_f16_fneg_fabs_rhs:
+; GFX12-FAKE16-GISEL: ; %bb.0:
+; GFX12-FAKE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v1, -|v1|, -|v1|
+; GFX12-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v1
+; GFX12-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
%fabs.y = call half @llvm.fabs.f16(half %y)
%fneg.fabs.y = fneg half %fabs.y
%result = call half @llvm.maximumnum.f16(half %x, half %fneg.fabs.y)
@@ -1779,83 +2934,160 @@ define half @v_maximumnum_f16_fneg_fabs_rhs(half %x, half %y) {
}
define half @v_maximumnum_f16_fabs(half %x, half %y) {
-; GFX7-LABEL: v_maximumnum_f16_fabs:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e64 v0, |v0|
-; GFX7-NEXT: v_cvt_f32_f16_e64 v1, |v1|
-; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_maximumnum_f16_fabs:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_max_f16_e64 v1, |v1|, |v1|
-; GFX8-NEXT: v_max_f16_e64 v0, |v0|, |v0|
-; GFX8-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: v_maximumnum_f16_fabs:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f16_e64 v1, |v1|, |v1|
-; GFX9-NEXT: v_max_f16_e64 v0, |v0|, |v0|
-; GFX9-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_maximumnum_f16_fabs:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_max_f16_e64 v1, |v1|, |v1|
-; GFX10-NEXT: v_max_f16_e64 v0, |v0|, |v0|
-; GFX10-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-TRUE16-LABEL: v_maximumnum_f16_fabs:
-; GFX11-TRUE16: ; %bb.0:
-; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT: v_max_f16_e64 v0.h, |v1.l|, |v1.l|
-; GFX11-TRUE16-NEXT: v_max_f16_e64 v0.l, |v0.l|, |v0.l|
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.h
-; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FAKE16-LABEL: v_maximumnum_f16_fabs:
-; GFX11-FAKE16: ; %bb.0:
-; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FAKE16-NEXT: v_max_f16_e64 v1, |v1|, |v1|
-; GFX11-FAKE16-NEXT: v_max_f16_e64 v0, |v0|, |v0|
-; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-TRUE16-LABEL: v_maximumnum_f16_fabs:
-; GFX12-TRUE16: ; %bb.0:
-; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
-; GFX12-TRUE16-NEXT: v_max_num_f16_e64 v0.h, |v1.l|, |v1.l|
-; GFX12-TRUE16-NEXT: v_max_num_f16_e64 v0.l, |v0.l|, |v0.l|
-; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
-; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-FAKE16-LABEL: v_maximumnum_f16_fabs:
-; GFX12-FAKE16: ; %bb.0:
-; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
-; GFX12-FAKE16-NEXT: v_max_num_f16_e64 v1, |v1|, |v1|
-; GFX12-FAKE16-NEXT: v_max_num_f16_e64 v0, |v0|, |v0|
-; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v1
-; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_maximumnum_f16_fabs:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0|
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e64 v1, |v1|
+; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_maximumnum_f16_fabs:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v0, |v0|
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v1, |v1|
+; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_maximumnum_f16_fabs:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX8-SDAG-NEXT: v_max_f16_e64 v0, |v0|, |v0|
+; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_maximumnum_f16_fabs:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_max_f16_e64 v0, |v0|, |v0|
+; GFX8-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: v_maximumnum_f16_fabs:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX9-SDAG-NEXT: v_max_f16_e64 v0, |v0|, |v0|
+; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: v_maximumnum_f16_fabs:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_max_f16_e64 v0, |v0|, |v0|
+; GFX9-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_maximumnum_f16_fabs:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX10-SDAG-NEXT: v_max_f16_e64 v0, |v0|, |v0|
+; GFX10-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_maximumnum_f16_fabs:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_max_f16_e64 v0, |v0|, |v0|
+; GFX10-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX10-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-SDAG-LABEL: v_maximumnum_f16_fabs:
+; GFX11-TRUE16-SDAG: ; %bb.0:
+; GFX11-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e64 v0.h, |v1.l|, |v1.l|
+; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e64 v0.l, |v0.l|, |v0.l|
+; GFX11-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e32 v0.l, v0.l, v0.h
+; GFX11-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-GISEL-LABEL: v_maximumnum_f16_fabs:
+; GFX11-TRUE16-GISEL: ; %bb.0:
+; GFX11-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e64 v0.l, |v0.l|, |v0.l|
+; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e64 v0.h, |v1.l|, |v1.l|
+; GFX11-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e32 v0.l, v0.l, v0.h
+; GFX11-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-SDAG-LABEL: v_maximumnum_f16_fabs:
+; GFX11-FAKE16-SDAG: ; %bb.0:
+; GFX11-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e64 v0, |v0|, |v0|
+; GFX11-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX11-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-GISEL-LABEL: v_maximumnum_f16_fabs:
+; GFX11-FAKE16-GISEL: ; %bb.0:
+; GFX11-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e64 v0, |v0|, |v0|
+; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX11-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-TRUE16-SDAG-LABEL: v_maximumnum_f16_fabs:
+; GFX12-TRUE16-SDAG: ; %bb.0:
+; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.h, |v1.l|, |v1.l|
+; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.l, |v0.l|, |v0.l|
+; GFX12-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
+; GFX12-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-TRUE16-GISEL-LABEL: v_maximumnum_f16_fabs:
+; GFX12-TRUE16-GISEL: ; %bb.0:
+; GFX12-TRUE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.l, |v0.l|, |v0.l|
+; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.h, |v1.l|, |v1.l|
+; GFX12-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
+; GFX12-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-FAKE16-SDAG-LABEL: v_maximumnum_f16_fabs:
+; GFX12-FAKE16-SDAG: ; %bb.0:
+; GFX12-FAKE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v1, |v1|, |v1|
+; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v0, |v0|, |v0|
+; GFX12-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v1
+; GFX12-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-FAKE16-GISEL-LABEL: v_maximumnum_f16_fabs:
+; GFX12-FAKE16-GISEL: ; %bb.0:
+; GFX12-FAKE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v0, |v0|, |v0|
+; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v1, |v1|, |v1|
+; GFX12-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v1
+; GFX12-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
%fabs.x = call half @llvm.fabs.f16(half %x)
%fabs.y = call half @llvm.fabs.f16(half %y)
%result = call half @llvm.maximumnum.f16(half %fabs.x, half %fabs.y)
@@ -1863,83 +3095,160 @@ define half @v_maximumnum_f16_fabs(half %x, half %y) {
}
define half @v_maximumnum_f16_fneg(half %x, half %y) {
-; GFX7-LABEL: v_maximumnum_f16_fneg:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e64 v0, -v0
-; GFX7-NEXT: v_cvt_f16_f32_e64 v1, -v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_maximumnum_f16_fneg:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_max_f16_e64 v1, -v1, -v1
-; GFX8-NEXT: v_max_f16_e64 v0, -v0, -v0
-; GFX8-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: v_maximumnum_f16_fneg:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f16_e64 v1, -v1, -v1
-; GFX9-NEXT: v_max_f16_e64 v0, -v0, -v0
-; GFX9-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_maximumnum_f16_fneg:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_max_f16_e64 v1, -v1, -v1
-; GFX10-NEXT: v_max_f16_e64 v0, -v0, -v0
-; GFX10-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-TRUE16-LABEL: v_maximumnum_f16_fneg:
-; GFX11-TRUE16: ; %bb.0:
-; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT: v_max_f16_e64 v0.h, -v1.l, -v1.l
-; GFX11-TRUE16-NEXT: v_max_f16_e64 v0.l, -v0.l, -v0.l
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.h
-; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FAKE16-LABEL: v_maximumnum_f16_fneg:
-; GFX11-FAKE16: ; %bb.0:
-; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FAKE16-NEXT: v_max_f16_e64 v1, -v1, -v1
-; GFX11-FAKE16-NEXT: v_max_f16_e64 v0, -v0, -v0
-; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-TRUE16-LABEL: v_maximumnum_f16_fneg:
-; GFX12-TRUE16: ; %bb.0:
-; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
-; GFX12-TRUE16-NEXT: v_max_num_f16_e64 v0.h, -v1.l, -v1.l
-; GFX12-TRUE16-NEXT: v_max_num_f16_e64 v0.l, -v0.l, -v0.l
-; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
-; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-FAKE16-LABEL: v_maximumnum_f16_fneg:
-; GFX12-FAKE16: ; %bb.0:
-; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
-; GFX12-FAKE16-NEXT: v_max_num_f16_e64 v1, -v1, -v1
-; GFX12-FAKE16-NEXT: v_max_num_f16_e64 v0, -v0, -v0
-; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v1
-; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_maximumnum_f16_fneg:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e64 v0, -v0
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e64 v1, -v1
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_maximumnum_f16_fneg:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v0, -v0
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v1, -v1
+; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_maximumnum_f16_fneg:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_max_f16_e64 v1, -v1, -v1
+; GFX8-SDAG-NEXT: v_max_f16_e64 v0, -v0, -v0
+; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_maximumnum_f16_fneg:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_max_f16_e64 v0, -v0, -v0
+; GFX8-GISEL-NEXT: v_max_f16_e64 v1, -v1, -v1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: v_maximumnum_f16_fneg:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_max_f16_e64 v1, -v1, -v1
+; GFX9-SDAG-NEXT: v_max_f16_e64 v0, -v0, -v0
+; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: v_maximumnum_f16_fneg:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_max_f16_e64 v0, -v0, -v0
+; GFX9-GISEL-NEXT: v_max_f16_e64 v1, -v1, -v1
+; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_maximumnum_f16_fneg:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_max_f16_e64 v1, -v1, -v1
+; GFX10-SDAG-NEXT: v_max_f16_e64 v0, -v0, -v0
+; GFX10-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_maximumnum_f16_fneg:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_max_f16_e64 v0, -v0, -v0
+; GFX10-GISEL-NEXT: v_max_f16_e64 v1, -v1, -v1
+; GFX10-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-SDAG-LABEL: v_maximumnum_f16_fneg:
+; GFX11-TRUE16-SDAG: ; %bb.0:
+; GFX11-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e64 v0.h, -v1.l, -v1.l
+; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e64 v0.l, -v0.l, -v0.l
+; GFX11-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e32 v0.l, v0.l, v0.h
+; GFX11-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-GISEL-LABEL: v_maximumnum_f16_fneg:
+; GFX11-TRUE16-GISEL: ; %bb.0:
+; GFX11-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e64 v0.l, -v0.l, -v0.l
+; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e64 v0.h, -v1.l, -v1.l
+; GFX11-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e32 v0.l, v0.l, v0.h
+; GFX11-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-SDAG-LABEL: v_maximumnum_f16_fneg:
+; GFX11-FAKE16-SDAG: ; %bb.0:
+; GFX11-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e64 v1, -v1, -v1
+; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e64 v0, -v0, -v0
+; GFX11-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX11-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-GISEL-LABEL: v_maximumnum_f16_fneg:
+; GFX11-FAKE16-GISEL: ; %bb.0:
+; GFX11-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e64 v0, -v0, -v0
+; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e64 v1, -v1, -v1
+; GFX11-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-TRUE16-SDAG-LABEL: v_maximumnum_f16_fneg:
+; GFX12-TRUE16-SDAG: ; %bb.0:
+; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.h, -v1.l, -v1.l
+; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.l, -v0.l, -v0.l
+; GFX12-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
+; GFX12-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-TRUE16-GISEL-LABEL: v_maximumnum_f16_fneg:
+; GFX12-TRUE16-GISEL: ; %bb.0:
+; GFX12-TRUE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.l, -v0.l, -v0.l
+; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.h, -v1.l, -v1.l
+; GFX12-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
+; GFX12-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-FAKE16-SDAG-LABEL: v_maximumnum_f16_fneg:
+; GFX12-FAKE16-SDAG: ; %bb.0:
+; GFX12-FAKE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v1, -v1, -v1
+; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v0, -v0, -v0
+; GFX12-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v1
+; GFX12-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-FAKE16-GISEL-LABEL: v_maximumnum_f16_fneg:
+; GFX12-FAKE16-GISEL: ; %bb.0:
+; GFX12-FAKE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v0, -v0, -v0
+; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v1, -v1, -v1
+; GFX12-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v1
+; GFX12-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
%fneg.x = fneg half %x
%fneg.y = fneg half %y
%result = call half @llvm.maximumnum.f16(half %fneg.x, half %fneg.y)
@@ -1947,166 +3256,313 @@ define half @v_maximumnum_f16_fneg(half %x, half %y) {
}
define double @v_maximumnum_f64_fneg(double %x, double %y) {
-; GFX7-LABEL: v_maximumnum_f64_fneg:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3]
-; GFX7-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
-; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_maximumnum_f64_fneg:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3]
-; GFX8-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
-; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: v_maximumnum_f64_fneg:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3]
-; GFX9-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
-; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_maximumnum_f64_fneg:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3]
-; GFX10-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
-; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: v_maximumnum_f64_fneg:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3]
-; GFX11-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_maximumnum_f64_fneg:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_max_num_f64_e64 v[2:3], -v[2:3], -v[2:3]
-; GFX12-NEXT: v_max_num_f64_e64 v[0:1], -v[0:1], -v[0:1]
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[2:3]
-; GFX12-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_maximumnum_f64_fneg:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3]
+; GFX7-SDAG-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
+; GFX7-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_maximumnum_f64_fneg:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
+; GFX7-GISEL-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3]
+; GFX7-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_maximumnum_f64_fneg:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3]
+; GFX8-SDAG-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
+; GFX8-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_maximumnum_f64_fneg:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
+; GFX8-GISEL-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3]
+; GFX8-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: v_maximumnum_f64_fneg:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3]
+; GFX9-SDAG-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
+; GFX9-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: v_maximumnum_f64_fneg:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
+; GFX9-GISEL-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3]
+; GFX9-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_maximumnum_f64_fneg:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3]
+; GFX10-SDAG-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
+; GFX10-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_maximumnum_f64_fneg:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
+; GFX10-GISEL-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3]
+; GFX10-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_maximumnum_f64_fneg:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3]
+; GFX11-SDAG-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_maximumnum_f64_fneg:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
+; GFX11-GISEL-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3]
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-SDAG-LABEL: v_maximumnum_f64_fneg:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: v_max_num_f64_e64 v[2:3], -v[2:3], -v[2:3]
+; GFX12-SDAG-NEXT: v_max_num_f64_e64 v[0:1], -v[0:1], -v[0:1]
+; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[2:3]
+; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GISEL-LABEL: v_maximumnum_f64_fneg:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: v_max_num_f64_e64 v[0:1], -v[0:1], -v[0:1]
+; GFX12-GISEL-NEXT: v_max_num_f64_e64 v[2:3], -v[2:3], -v[2:3]
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[2:3]
+; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
%fneg.x = fneg double %x
%fneg.y = fneg double %y
%result = call double @llvm.maximumnum.f64(double %fneg.x, double %fneg.y)
ret double %result
-}
-
-define <2 x half> @v_maximumnum_v2f16(<2 x half> %x, <2 x half> %y) {
-; GFX7-LABEL: v_maximumnum_v2f16:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
-; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
-; GFX7-NEXT: v_max_f32_e32 v0, v0, v2
-; GFX7-NEXT: v_max_f32_e32 v1, v1, v3
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_maximumnum_v2f16:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_max_f16_sdwa v2, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v3, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_e32 v1, v1, v1
-; GFX8-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX8-NEXT: v_max_f16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX900-LABEL: v_maximumnum_v2f16:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX900-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX900-NEXT: v_pk_max_f16 v0, v0, v1
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX950-LABEL: v_maximumnum_v2f16:
-; GFX950: ; %bb.0:
-; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX950-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX950-NEXT: s_nop 0
-; GFX950-NEXT: v_pk_max_f16 v0, v0, v1
-; GFX950-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_maximumnum_v2f16:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX10-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX10-NEXT: v_pk_max_f16 v0, v0, v1
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: v_maximumnum_v2f16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX11-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_pk_max_f16 v0, v0, v1
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_maximumnum_v2f16:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v1
-; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v0
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v1
-; GFX12-NEXT: s_setpc_b64 s[30:31]
+}
+
+define <2 x half> @v_maximumnum_v2f16(<2 x half> %x, <2 x half> %y) {
+; GFX7-SDAG-LABEL: v_maximumnum_v2f16:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX7-SDAG-NEXT: v_max_f32_e32 v1, v1, v3
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_maximumnum_v2f16:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: v_max_f32_e32 v1, v1, v3
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_maximumnum_v2f16:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v2, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v3, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v2
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_maximumnum_v2f16:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_max_f16_e32 v2, v0, v0
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v3, v1, v1
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v2, v2, v3
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_maximumnum_v2f16:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_maximumnum_v2f16:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX900-GISEL-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX900-GISEL-NEXT: v_pk_max_f16 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_maximumnum_v2f16:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX950-SDAG-NEXT: s_nop 0
+; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v1
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_maximumnum_v2f16:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX950-GISEL-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX950-GISEL-NEXT: s_nop 0
+; GFX950-GISEL-NEXT: v_pk_max_f16 v0, v0, v1
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_maximumnum_v2f16:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX10-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX10-SDAG-NEXT: v_pk_max_f16 v0, v0, v1
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_maximumnum_v2f16:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX10-GISEL-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX10-GISEL-NEXT: v_pk_max_f16 v0, v0, v1
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_maximumnum_v2f16:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX11-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_pk_max_f16 v0, v0, v1
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_maximumnum_v2f16:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX11-GISEL-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_pk_max_f16 v0, v0, v1
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-SDAG-LABEL: v_maximumnum_v2f16:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v1
+; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GISEL-LABEL: v_maximumnum_v2f16:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v1
+; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call <2 x half> @llvm.maximumnum.v2f16(<2 x half> %x, <2 x half> %y)
ret <2 x half> %result
}
define <2 x half> @v_maximumnum_v2f16_nnan(<2 x half> %x, <2 x half> %y) {
-; GFX7-LABEL: v_maximumnum_v2f16_nnan:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
-; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_max_f32_e32 v0, v0, v2
-; GFX7-NEXT: v_max_f32_e32 v1, v1, v3
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_maximumnum_v2f16_nnan:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_max_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
-; GFX8-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_maximumnum_v2f16_nnan:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX7-SDAG-NEXT: v_max_f32_e32 v1, v1, v3
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_maximumnum_v2f16_nnan:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: v_max_f32_e32 v1, v1, v3
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_maximumnum_v2f16_nnan:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v2
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_maximumnum_v2f16_nnan:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_max_f16_e32 v2, v0, v1
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_maximumnum_v2f16_nnan:
; GFX9: ; %bb.0:
@@ -2140,135 +3596,243 @@ define <2 x half> @v_maximumnum_v2f16_nnan(<2 x half> %x, <2 x half> %y) {
}
define <3 x half> @v_maximumnum_v3f16(<3 x half> %x, <3 x half> %y) {
-; GFX7-LABEL: v_maximumnum_v3f16:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
-; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
-; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
-; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
-; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
-; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
-; GFX7-NEXT: v_max_f32_e32 v0, v0, v3
-; GFX7-NEXT: v_max_f32_e32 v1, v1, v4
-; GFX7-NEXT: v_max_f32_e32 v2, v2, v5
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_maximumnum_v3f16:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_max_f16_sdwa v4, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v5, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_e32 v2, v2, v2
-; GFX8-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX8-NEXT: v_max_f16_sdwa v4, v5, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_max_f16_e32 v3, v3, v3
-; GFX8-NEXT: v_max_f16_e32 v1, v1, v1
-; GFX8-NEXT: v_max_f16_e32 v0, v0, v2
-; GFX8-NEXT: v_max_f16_e32 v1, v1, v3
-; GFX8-NEXT: v_or_b32_e32 v0, v0, v4
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX900-LABEL: v_maximumnum_v3f16:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_pk_max_f16 v2, v2, v2
-; GFX900-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX900-NEXT: v_pk_max_f16 v0, v0, v2
-; GFX900-NEXT: v_pk_max_f16 v2, v3, v3
-; GFX900-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX900-NEXT: v_pk_max_f16 v1, v1, v2
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX950-LABEL: v_maximumnum_v3f16:
-; GFX950: ; %bb.0:
-; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_pk_max_f16 v2, v2, v2
-; GFX950-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX950-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX950-NEXT: v_pk_max_f16 v0, v0, v2
-; GFX950-NEXT: v_pk_max_f16 v2, v3, v3
-; GFX950-NEXT: s_nop 0
-; GFX950-NEXT: v_pk_max_f16 v1, v1, v2
-; GFX950-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_maximumnum_v3f16:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_pk_max_f16 v2, v2, v2
-; GFX10-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX10-NEXT: v_pk_max_f16 v3, v3, v3
-; GFX10-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX10-NEXT: v_pk_max_f16 v0, v0, v2
-; GFX10-NEXT: v_pk_max_f16 v1, v1, v3
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: v_maximumnum_v3f16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_pk_max_f16 v2, v2, v2
-; GFX11-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX11-NEXT: v_pk_max_f16 v3, v3, v3
-; GFX11-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_pk_max_f16 v0, v0, v2
-; GFX11-NEXT: v_pk_max_f16 v1, v1, v3
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_maximumnum_v3f16:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_pk_max_num_f16 v2, v2, v2
-; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v0
-; GFX12-NEXT: v_pk_max_num_f16 v3, v3, v3
-; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v1
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v2
-; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v3
-; GFX12-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_maximumnum_v3f16:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v5, v5
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v4, v4
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v3
+; GFX7-SDAG-NEXT: v_max_f32_e32 v1, v1, v4
+; GFX7-SDAG-NEXT: v_max_f32_e32 v2, v2, v5
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_maximumnum_v3f16:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v3
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v4
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v5
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: v_max_f32_e32 v1, v1, v3
+; GFX7-GISEL-NEXT: v_max_f32_e32 v2, v2, v4
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_maximumnum_v3f16:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v4, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v5, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_e32 v2, v2, v2
+; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v4, v5, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_max_f16_e32 v3, v3, v3
+; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v2
+; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v3
+; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v4
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_maximumnum_v3f16:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_max_f16_e32 v4, v0, v0
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v5, v2, v2
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v2, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v3, v3, v3
+; GFX8-GISEL-NEXT: v_max_f16_e32 v4, v4, v5
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_max_f16_e32 v1, v1, v3
+; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_maximumnum_v3f16:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v2
+; GFX900-SDAG-NEXT: v_pk_max_f16 v2, v3, v3
+; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v2
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: v_maximumnum_v3f16:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX9-GISEL-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX9-GISEL-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX9-GISEL-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX9-GISEL-NEXT: v_pk_max_f16 v0, v0, v2
+; GFX9-GISEL-NEXT: v_pk_max_f16 v1, v1, v3
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_maximumnum_v3f16:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v2
+; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v3, v3
+; GFX950-SDAG-NEXT: s_nop 0
+; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v2
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_maximumnum_v3f16:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX10-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX10-SDAG-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX10-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX10-SDAG-NEXT: v_pk_max_f16 v0, v0, v2
+; GFX10-SDAG-NEXT: v_pk_max_f16 v1, v1, v3
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_maximumnum_v3f16:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX10-GISEL-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX10-GISEL-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX10-GISEL-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX10-GISEL-NEXT: v_pk_max_f16 v0, v0, v2
+; GFX10-GISEL-NEXT: v_pk_max_f16 v1, v1, v3
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_maximumnum_v3f16:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX11-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX11-SDAG-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX11-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-NEXT: v_pk_max_f16 v0, v0, v2
+; GFX11-SDAG-NEXT: v_pk_max_f16 v1, v1, v3
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_maximumnum_v3f16:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX11-GISEL-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX11-GISEL-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX11-GISEL-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_pk_max_f16 v0, v0, v2
+; GFX11-GISEL-NEXT: v_pk_max_f16 v1, v1, v3
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-SDAG-LABEL: v_maximumnum_v3f16:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v2
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v3
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v2
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v3
+; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GISEL-LABEL: v_maximumnum_v3f16:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v2
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v3
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v2
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v3
+; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call <3 x half> @llvm.maximumnum.v3f16(<3 x half> %x, <3 x half> %y)
ret <3 x half> %result
}
define <3 x half> @v_maximumnum_v3f16_nnan(<3 x half> %x, <3 x half> %y) {
-; GFX7-LABEL: v_maximumnum_v3f16_nnan:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
-; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
-; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
-; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
-; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
-; GFX7-NEXT: v_max_f32_e32 v0, v0, v3
-; GFX7-NEXT: v_max_f32_e32 v1, v1, v4
-; GFX7-NEXT: v_max_f32_e32 v2, v2, v5
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_maximumnum_v3f16_nnan:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_max_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_e32 v0, v0, v2
-; GFX8-NEXT: v_max_f16_e32 v1, v1, v3
-; GFX8-NEXT: v_or_b32_e32 v0, v0, v4
-; GFX8-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_maximumnum_v3f16_nnan:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v5, v5
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v4, v4
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v3
+; GFX7-SDAG-NEXT: v_max_f32_e32 v1, v1, v4
+; GFX7-SDAG-NEXT: v_max_f32_e32 v2, v2, v5
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_maximumnum_v3f16_nnan:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v3
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v4
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v5
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: v_max_f32_e32 v1, v1, v3
+; GFX7-GISEL-NEXT: v_max_f32_e32 v2, v2, v4
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_maximumnum_v3f16_nnan:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v2
+; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v3
+; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v4
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_maximumnum_v3f16_nnan:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_max_f16_e32 v4, v0, v2
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v1, v1, v3
+; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_maximumnum_v3f16_nnan:
; GFX9: ; %bb.0:
@@ -2306,151 +3870,273 @@ define <3 x half> @v_maximumnum_v3f16_nnan(<3 x half> %x, <3 x half> %y) {
}
define <4 x half> @v_maximumnum_v4f16(<4 x half> %x, <4 x half> %y) {
-; GFX7-LABEL: v_maximumnum_v4f16:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
-; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
-; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
-; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
-; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
-; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
-; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
-; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
-; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
-; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
-; GFX7-NEXT: v_max_f32_e32 v0, v0, v4
-; GFX7-NEXT: v_max_f32_e32 v1, v1, v5
-; GFX7-NEXT: v_max_f32_e32 v2, v2, v6
-; GFX7-NEXT: v_max_f32_e32 v3, v3, v7
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_maximumnum_v4f16:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_max_f16_sdwa v4, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v5, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v4, v5, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_max_f16_sdwa v5, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v6, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_e32 v3, v3, v3
-; GFX8-NEXT: v_max_f16_e32 v1, v1, v1
-; GFX8-NEXT: v_max_f16_e32 v2, v2, v2
-; GFX8-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX8-NEXT: v_max_f16_sdwa v5, v6, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_max_f16_e32 v1, v1, v3
-; GFX8-NEXT: v_max_f16_e32 v0, v0, v2
-; GFX8-NEXT: v_or_b32_e32 v0, v0, v5
-; GFX8-NEXT: v_or_b32_e32 v1, v1, v4
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX900-LABEL: v_maximumnum_v4f16:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_pk_max_f16 v2, v2, v2
-; GFX900-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX900-NEXT: v_pk_max_f16 v0, v0, v2
-; GFX900-NEXT: v_pk_max_f16 v2, v3, v3
-; GFX900-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX900-NEXT: v_pk_max_f16 v1, v1, v2
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX950-LABEL: v_maximumnum_v4f16:
-; GFX950: ; %bb.0:
-; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_pk_max_f16 v2, v2, v2
-; GFX950-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX950-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX950-NEXT: v_pk_max_f16 v0, v0, v2
-; GFX950-NEXT: v_pk_max_f16 v2, v3, v3
-; GFX950-NEXT: s_nop 0
-; GFX950-NEXT: v_pk_max_f16 v1, v1, v2
-; GFX950-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_maximumnum_v4f16:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_pk_max_f16 v2, v2, v2
-; GFX10-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX10-NEXT: v_pk_max_f16 v3, v3, v3
-; GFX10-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX10-NEXT: v_pk_max_f16 v0, v0, v2
-; GFX10-NEXT: v_pk_max_f16 v1, v1, v3
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: v_maximumnum_v4f16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_pk_max_f16 v2, v2, v2
-; GFX11-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX11-NEXT: v_pk_max_f16 v3, v3, v3
-; GFX11-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_pk_max_f16 v0, v0, v2
-; GFX11-NEXT: v_pk_max_f16 v1, v1, v3
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_maximumnum_v4f16:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_pk_max_num_f16 v2, v2, v2
-; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v0
-; GFX12-NEXT: v_pk_max_num_f16 v3, v3, v3
-; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v1
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v2
-; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v3
-; GFX12-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_maximumnum_v4f16:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v7, v7
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v6, v6
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v4, v4
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v5, v5
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v6, v6
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v7, v7
+; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v4
+; GFX7-SDAG-NEXT: v_max_f32_e32 v1, v1, v5
+; GFX7-SDAG-NEXT: v_max_f32_e32 v2, v2, v6
+; GFX7-SDAG-NEXT: v_max_f32_e32 v3, v3, v7
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_maximumnum_v4f16:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v4
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v6
+; GFX7-GISEL-NEXT: v_max_f32_e32 v1, v1, v5
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v5, v7
+; GFX7-GISEL-NEXT: v_max_f32_e32 v2, v2, v4
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-GISEL-NEXT: v_max_f32_e32 v3, v3, v5
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_maximumnum_v4f16:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v4, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v5, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v4, v5, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v5, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v6, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_e32 v3, v3, v3
+; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX8-SDAG-NEXT: v_max_f16_e32 v2, v2, v2
+; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v5, v6, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v3
+; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v2
+; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v5
+; GFX8-SDAG-NEXT: v_or_b32_e32 v1, v1, v4
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_maximumnum_v4f16:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_max_f16_e32 v4, v0, v0
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v5, v1, v1
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v6, v2, v2
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v2, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v7, v3, v3
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v3, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v4, v4, v6
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_max_f16_e32 v2, v5, v7
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v1, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0
+; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v2, v1
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_maximumnum_v4f16:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v2
+; GFX900-SDAG-NEXT: v_pk_max_f16 v2, v3, v3
+; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v2
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: v_maximumnum_v4f16:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX9-GISEL-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX9-GISEL-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX9-GISEL-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX9-GISEL-NEXT: v_pk_max_f16 v0, v0, v2
+; GFX9-GISEL-NEXT: v_pk_max_f16 v1, v1, v3
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_maximumnum_v4f16:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v2
+; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v3, v3
+; GFX950-SDAG-NEXT: s_nop 0
+; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v2
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_maximumnum_v4f16:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX10-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX10-SDAG-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX10-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX10-SDAG-NEXT: v_pk_max_f16 v0, v0, v2
+; GFX10-SDAG-NEXT: v_pk_max_f16 v1, v1, v3
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_maximumnum_v4f16:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX10-GISEL-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX10-GISEL-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX10-GISEL-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX10-GISEL-NEXT: v_pk_max_f16 v0, v0, v2
+; GFX10-GISEL-NEXT: v_pk_max_f16 v1, v1, v3
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_maximumnum_v4f16:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX11-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX11-SDAG-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX11-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-NEXT: v_pk_max_f16 v0, v0, v2
+; GFX11-SDAG-NEXT: v_pk_max_f16 v1, v1, v3
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_maximumnum_v4f16:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX11-GISEL-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX11-GISEL-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX11-GISEL-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_pk_max_f16 v0, v0, v2
+; GFX11-GISEL-NEXT: v_pk_max_f16 v1, v1, v3
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-SDAG-LABEL: v_maximumnum_v4f16:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v2
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v3
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v2
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v3
+; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GISEL-LABEL: v_maximumnum_v4f16:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v2
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v3
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v2
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v3
+; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call <4 x half> @llvm.maximumnum.v4f16(<4 x half> %x, <4 x half> %y)
ret <4 x half> %result
}
define <4 x half> @v_maximumnum_v4f16_nnan(<4 x half> %x, <4 x half> %y) {
-; GFX7-LABEL: v_maximumnum_v4f16_nnan:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
-; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
-; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
-; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
-; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
-; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
-; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
-; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
-; GFX7-NEXT: v_max_f32_e32 v0, v0, v4
-; GFX7-NEXT: v_max_f32_e32 v1, v1, v5
-; GFX7-NEXT: v_max_f32_e32 v2, v2, v6
-; GFX7-NEXT: v_max_f32_e32 v3, v3, v7
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_maximumnum_v4f16_nnan:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_max_f16_sdwa v4, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_e32 v1, v1, v3
-; GFX8-NEXT: v_max_f16_e32 v0, v0, v2
-; GFX8-NEXT: v_or_b32_e32 v0, v0, v5
-; GFX8-NEXT: v_or_b32_e32 v1, v1, v4
-; GFX8-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_maximumnum_v4f16_nnan:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v7, v7
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v6, v6
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v5, v5
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v4, v4
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v7, v7
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v6, v6
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v4
+; GFX7-SDAG-NEXT: v_max_f32_e32 v1, v1, v5
+; GFX7-SDAG-NEXT: v_max_f32_e32 v2, v2, v6
+; GFX7-SDAG-NEXT: v_max_f32_e32 v3, v3, v7
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_maximumnum_v4f16_nnan:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v4
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v6
+; GFX7-GISEL-NEXT: v_max_f32_e32 v1, v1, v5
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v5, v7
+; GFX7-GISEL-NEXT: v_max_f32_e32 v2, v2, v4
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-GISEL-NEXT: v_max_f32_e32 v3, v3, v5
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_maximumnum_v4f16_nnan:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v4, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v3
+; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v2
+; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v5
+; GFX8-SDAG-NEXT: v_or_b32_e32 v1, v1, v4
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_maximumnum_v4f16_nnan:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_max_f16_e32 v4, v0, v2
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v2, v1, v3
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v1, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0
+; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v2, v1
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_maximumnum_v4f16_nnan:
; GFX9: ; %bb.0:
@@ -2488,1349 +4174,2545 @@ define <4 x half> @v_maximumnum_v4f16_nnan(<4 x half> %x, <4 x half> %y) {
}
define <6 x half> @v_maximumnum_v6f16(<6 x half> %x, <6 x half> %y) {
-; GFX7-LABEL: v_maximumnum_v6f16:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
-; GFX7-NEXT: v_cvt_f16_f32_e32 v11, v11
-; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
-; GFX7-NEXT: v_cvt_f16_f32_e32 v10, v10
-; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX7-NEXT: v_cvt_f16_f32_e32 v9, v9
-; GFX7-NEXT: v_cvt_f16_f32_e32 v8, v8
-; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
-; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
-; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
-; GFX7-NEXT: v_cvt_f32_f16_e32 v11, v11
-; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
-; GFX7-NEXT: v_cvt_f32_f16_e32 v10, v10
-; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
-; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
-; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
-; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
-; GFX7-NEXT: v_cvt_f32_f16_e32 v8, v8
-; GFX7-NEXT: v_cvt_f32_f16_e32 v9, v9
-; GFX7-NEXT: v_max_f32_e32 v0, v0, v6
-; GFX7-NEXT: v_max_f32_e32 v1, v1, v7
-; GFX7-NEXT: v_max_f32_e32 v2, v2, v8
-; GFX7-NEXT: v_max_f32_e32 v3, v3, v9
-; GFX7-NEXT: v_max_f32_e32 v4, v4, v10
-; GFX7-NEXT: v_max_f32_e32 v5, v5, v11
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_maximumnum_v6f16:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_max_f16_sdwa v6, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v7, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v6, v7, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_max_f16_sdwa v7, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v8, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v7, v8, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_max_f16_sdwa v8, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v9, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_e32 v5, v5, v5
-; GFX8-NEXT: v_max_f16_e32 v2, v2, v2
-; GFX8-NEXT: v_max_f16_e32 v4, v4, v4
-; GFX8-NEXT: v_max_f16_e32 v1, v1, v1
-; GFX8-NEXT: v_max_f16_e32 v3, v3, v3
-; GFX8-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX8-NEXT: v_max_f16_sdwa v8, v9, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_max_f16_e32 v2, v2, v5
-; GFX8-NEXT: v_max_f16_e32 v1, v1, v4
-; GFX8-NEXT: v_max_f16_e32 v0, v0, v3
-; GFX8-NEXT: v_or_b32_e32 v0, v0, v8
-; GFX8-NEXT: v_or_b32_e32 v1, v1, v7
-; GFX8-NEXT: v_or_b32_e32 v2, v2, v6
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX900-LABEL: v_maximumnum_v6f16:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_pk_max_f16 v3, v3, v3
-; GFX900-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX900-NEXT: v_pk_max_f16 v0, v0, v3
-; GFX900-NEXT: v_pk_max_f16 v3, v4, v4
-; GFX900-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX900-NEXT: v_pk_max_f16 v1, v1, v3
-; GFX900-NEXT: v_pk_max_f16 v3, v5, v5
-; GFX900-NEXT: v_pk_max_f16 v2, v2, v2
-; GFX900-NEXT: v_pk_max_f16 v2, v2, v3
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX950-LABEL: v_maximumnum_v6f16:
-; GFX950: ; %bb.0:
-; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_pk_max_f16 v3, v3, v3
-; GFX950-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX950-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX950-NEXT: v_pk_max_f16 v0, v0, v3
-; GFX950-NEXT: v_pk_max_f16 v3, v4, v4
-; GFX950-NEXT: v_pk_max_f16 v2, v2, v2
-; GFX950-NEXT: v_pk_max_f16 v1, v1, v3
-; GFX950-NEXT: v_pk_max_f16 v3, v5, v5
-; GFX950-NEXT: s_nop 0
-; GFX950-NEXT: v_pk_max_f16 v2, v2, v3
-; GFX950-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_maximumnum_v6f16:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_pk_max_f16 v3, v3, v3
-; GFX10-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX10-NEXT: v_pk_max_f16 v4, v4, v4
-; GFX10-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX10-NEXT: v_pk_max_f16 v5, v5, v5
-; GFX10-NEXT: v_pk_max_f16 v2, v2, v2
-; GFX10-NEXT: v_pk_max_f16 v0, v0, v3
-; GFX10-NEXT: v_pk_max_f16 v1, v1, v4
-; GFX10-NEXT: v_pk_max_f16 v2, v2, v5
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: v_maximumnum_v6f16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_pk_max_f16 v3, v3, v3
-; GFX11-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX11-NEXT: v_pk_max_f16 v4, v4, v4
-; GFX11-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX11-NEXT: v_pk_max_f16 v5, v5, v5
-; GFX11-NEXT: v_pk_max_f16 v2, v2, v2
-; GFX11-NEXT: v_pk_max_f16 v0, v0, v3
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_pk_max_f16 v1, v1, v4
-; GFX11-NEXT: v_pk_max_f16 v2, v2, v5
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_maximumnum_v6f16:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_pk_max_num_f16 v3, v3, v3
-; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v0
-; GFX12-NEXT: v_pk_max_num_f16 v4, v4, v4
-; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v1
-; GFX12-NEXT: v_pk_max_num_f16 v5, v5, v5
-; GFX12-NEXT: v_pk_max_num_f16 v2, v2, v2
-; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v3
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v4
-; GFX12-NEXT: v_pk_max_num_f16 v2, v2, v5
-; GFX12-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_maximumnum_v6f16:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v5, v5
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v11, v11
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v4, v4
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v10, v10
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v9, v9
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v8, v8
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v6, v6
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v7, v7
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v11, v11
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v10, v10
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v6, v6
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v7, v7
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v8, v8
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v9, v9
+; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v6
+; GFX7-SDAG-NEXT: v_max_f32_e32 v1, v1, v7
+; GFX7-SDAG-NEXT: v_max_f32_e32 v2, v2, v8
+; GFX7-SDAG-NEXT: v_max_f32_e32 v3, v3, v9
+; GFX7-SDAG-NEXT: v_max_f32_e32 v4, v4, v10
+; GFX7-SDAG-NEXT: v_max_f32_e32 v5, v5, v11
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_maximumnum_v6f16:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v6, v6
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v7, v7
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v6
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v6, v8
+; GFX7-GISEL-NEXT: v_max_f32_e32 v1, v1, v7
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v7, v9
+; GFX7-GISEL-NEXT: v_max_f32_e32 v2, v2, v6
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v6, v10
+; GFX7-GISEL-NEXT: v_max_f32_e32 v3, v3, v7
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v7, v11
+; GFX7-GISEL-NEXT: v_max_f32_e32 v4, v4, v6
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-GISEL-NEXT: v_max_f32_e32 v5, v5, v7
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v4, v4
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v5, v5
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_maximumnum_v6f16:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v6, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v7, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v6, v7, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v7, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v8, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v7, v8, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v8, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v9, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_e32 v5, v5, v5
+; GFX8-SDAG-NEXT: v_max_f16_e32 v2, v2, v2
+; GFX8-SDAG-NEXT: v_max_f16_e32 v4, v4, v4
+; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX8-SDAG-NEXT: v_max_f16_e32 v3, v3, v3
+; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v8, v9, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_max_f16_e32 v2, v2, v5
+; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v4
+; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v3
+; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v8
+; GFX8-SDAG-NEXT: v_or_b32_e32 v1, v1, v7
+; GFX8-SDAG-NEXT: v_or_b32_e32 v2, v2, v6
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_maximumnum_v6f16:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_max_f16_e32 v6, v0, v0
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v7, v1, v1
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v8, v2, v2
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v2, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v9, v3, v3
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v3, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v10, v4, v4
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v4, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v11, v5, v5
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v5, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v6, v6, v9
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_max_f16_e32 v3, v7, v10
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v1, v1, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_max_f16_e32 v4, v8, v11
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v2, v2, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v6, v0
+; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v3, v1
+; GFX8-GISEL-NEXT: v_or_b32_e32 v2, v4, v2
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_maximumnum_v6f16:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v3
+; GFX900-SDAG-NEXT: v_pk_max_f16 v3, v4, v4
+; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v3
+; GFX900-SDAG-NEXT: v_pk_max_f16 v3, v5, v5
+; GFX900-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX900-SDAG-NEXT: v_pk_max_f16 v2, v2, v3
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: v_maximumnum_v6f16:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX9-GISEL-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX9-GISEL-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX9-GISEL-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX9-GISEL-NEXT: v_pk_max_f16 v4, v4, v4
+; GFX9-GISEL-NEXT: v_pk_max_f16 v5, v5, v5
+; GFX9-GISEL-NEXT: v_pk_max_f16 v0, v0, v3
+; GFX9-GISEL-NEXT: v_pk_max_f16 v1, v1, v4
+; GFX9-GISEL-NEXT: v_pk_max_f16 v2, v2, v5
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_maximumnum_v6f16:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v3
+; GFX950-SDAG-NEXT: v_pk_max_f16 v3, v4, v4
+; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v3
+; GFX950-SDAG-NEXT: v_pk_max_f16 v3, v5, v5
+; GFX950-SDAG-NEXT: s_nop 0
+; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v2, v3
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_maximumnum_v6f16:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX10-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX10-SDAG-NEXT: v_pk_max_f16 v4, v4, v4
+; GFX10-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX10-SDAG-NEXT: v_pk_max_f16 v5, v5, v5
+; GFX10-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX10-SDAG-NEXT: v_pk_max_f16 v0, v0, v3
+; GFX10-SDAG-NEXT: v_pk_max_f16 v1, v1, v4
+; GFX10-SDAG-NEXT: v_pk_max_f16 v2, v2, v5
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_maximumnum_v6f16:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX10-GISEL-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX10-GISEL-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX10-GISEL-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX10-GISEL-NEXT: v_pk_max_f16 v4, v4, v4
+; GFX10-GISEL-NEXT: v_pk_max_f16 v5, v5, v5
+; GFX10-GISEL-NEXT: v_pk_max_f16 v0, v0, v3
+; GFX10-GISEL-NEXT: v_pk_max_f16 v1, v1, v4
+; GFX10-GISEL-NEXT: v_pk_max_f16 v2, v2, v5
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_maximumnum_v6f16:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX11-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX11-SDAG-NEXT: v_pk_max_f16 v4, v4, v4
+; GFX11-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX11-SDAG-NEXT: v_pk_max_f16 v5, v5, v5
+; GFX11-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX11-SDAG-NEXT: v_pk_max_f16 v0, v0, v3
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-SDAG-NEXT: v_pk_max_f16 v1, v1, v4
+; GFX11-SDAG-NEXT: v_pk_max_f16 v2, v2, v5
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_maximumnum_v6f16:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX11-GISEL-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX11-GISEL-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX11-GISEL-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX11-GISEL-NEXT: v_pk_max_f16 v4, v4, v4
+; GFX11-GISEL-NEXT: v_pk_max_f16 v5, v5, v5
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-GISEL-NEXT: v_pk_max_f16 v0, v0, v3
+; GFX11-GISEL-NEXT: v_pk_max_f16 v1, v1, v4
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX11-GISEL-NEXT: v_pk_max_f16 v2, v2, v5
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-SDAG-LABEL: v_maximumnum_v6f16:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v3
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v4, v4, v4
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v5, v5, v5
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v2
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v3
+; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v4
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v5
+; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GISEL-LABEL: v_maximumnum_v6f16:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v2
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v3
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v4, v4, v4
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v5, v5, v5
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v3
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v4
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v5
+; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call <6 x half> @llvm.maximumnum.v6f16(<6 x half> %x, <6 x half> %y)
ret <6 x half> %result
}
define <8 x half> @v_maximumnum_v8f16(<8 x half> %x, <8 x half> %y) {
-; GFX7-LABEL: v_maximumnum_v8f16:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
-; GFX7-NEXT: v_cvt_f16_f32_e32 v15, v15
-; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
-; GFX7-NEXT: v_cvt_f16_f32_e32 v14, v14
-; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
-; GFX7-NEXT: v_cvt_f16_f32_e32 v13, v13
-; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
-; GFX7-NEXT: v_cvt_f16_f32_e32 v12, v12
-; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX7-NEXT: v_cvt_f16_f32_e32 v11, v11
-; GFX7-NEXT: v_cvt_f16_f32_e32 v10, v10
-; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f16_f32_e32 v8, v8
-; GFX7-NEXT: v_cvt_f16_f32_e32 v9, v9
-; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
-; GFX7-NEXT: v_cvt_f32_f16_e32 v15, v15
-; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
-; GFX7-NEXT: v_cvt_f32_f16_e32 v14, v14
-; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
-; GFX7-NEXT: v_cvt_f32_f16_e32 v13, v13
-; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
-; GFX7-NEXT: v_cvt_f32_f16_e32 v12, v12
-; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
-; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
-; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v8, v8
-; GFX7-NEXT: v_cvt_f32_f16_e32 v9, v9
-; GFX7-NEXT: v_cvt_f32_f16_e32 v10, v10
-; GFX7-NEXT: v_cvt_f32_f16_e32 v11, v11
-; GFX7-NEXT: v_max_f32_e32 v0, v0, v8
-; GFX7-NEXT: v_max_f32_e32 v1, v1, v9
-; GFX7-NEXT: v_max_f32_e32 v2, v2, v10
-; GFX7-NEXT: v_max_f32_e32 v3, v3, v11
-; GFX7-NEXT: v_max_f32_e32 v4, v4, v12
-; GFX7-NEXT: v_max_f32_e32 v5, v5, v13
-; GFX7-NEXT: v_max_f32_e32 v6, v6, v14
-; GFX7-NEXT: v_max_f32_e32 v7, v7, v15
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_maximumnum_v8f16:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_max_f16_sdwa v8, v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v9, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v8, v9, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_max_f16_sdwa v9, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v10, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v9, v10, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_max_f16_sdwa v10, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v11, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v10, v11, v10 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_max_f16_sdwa v11, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v12, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_e32 v7, v7, v7
-; GFX8-NEXT: v_max_f16_e32 v3, v3, v3
-; GFX8-NEXT: v_max_f16_e32 v6, v6, v6
-; GFX8-NEXT: v_max_f16_e32 v2, v2, v2
-; GFX8-NEXT: v_max_f16_e32 v5, v5, v5
-; GFX8-NEXT: v_max_f16_e32 v1, v1, v1
-; GFX8-NEXT: v_max_f16_e32 v4, v4, v4
-; GFX8-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX8-NEXT: v_max_f16_sdwa v11, v12, v11 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_max_f16_e32 v3, v3, v7
-; GFX8-NEXT: v_max_f16_e32 v2, v2, v6
-; GFX8-NEXT: v_max_f16_e32 v1, v1, v5
-; GFX8-NEXT: v_max_f16_e32 v0, v0, v4
-; GFX8-NEXT: v_or_b32_e32 v0, v0, v11
-; GFX8-NEXT: v_or_b32_e32 v1, v1, v10
-; GFX8-NEXT: v_or_b32_e32 v2, v2, v9
-; GFX8-NEXT: v_or_b32_e32 v3, v3, v8
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX900-LABEL: v_maximumnum_v8f16:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_pk_max_f16 v4, v4, v4
-; GFX900-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX900-NEXT: v_pk_max_f16 v0, v0, v4
-; GFX900-NEXT: v_pk_max_f16 v4, v5, v5
-; GFX900-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX900-NEXT: v_pk_max_f16 v1, v1, v4
-; GFX900-NEXT: v_pk_max_f16 v4, v6, v6
-; GFX900-NEXT: v_pk_max_f16 v2, v2, v2
-; GFX900-NEXT: v_pk_max_f16 v2, v2, v4
-; GFX900-NEXT: v_pk_max_f16 v4, v7, v7
-; GFX900-NEXT: v_pk_max_f16 v3, v3, v3
-; GFX900-NEXT: v_pk_max_f16 v3, v3, v4
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX950-LABEL: v_maximumnum_v8f16:
-; GFX950: ; %bb.0:
-; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_pk_max_f16 v4, v4, v4
-; GFX950-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX950-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX950-NEXT: v_pk_max_f16 v0, v0, v4
-; GFX950-NEXT: v_pk_max_f16 v4, v5, v5
-; GFX950-NEXT: v_pk_max_f16 v2, v2, v2
-; GFX950-NEXT: v_pk_max_f16 v1, v1, v4
-; GFX950-NEXT: v_pk_max_f16 v4, v6, v6
-; GFX950-NEXT: v_pk_max_f16 v3, v3, v3
-; GFX950-NEXT: v_pk_max_f16 v2, v2, v4
-; GFX950-NEXT: v_pk_max_f16 v4, v7, v7
-; GFX950-NEXT: s_nop 0
-; GFX950-NEXT: v_pk_max_f16 v3, v3, v4
-; GFX950-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_maximumnum_v8f16:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_pk_max_f16 v4, v4, v4
-; GFX10-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX10-NEXT: v_pk_max_f16 v5, v5, v5
-; GFX10-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX10-NEXT: v_pk_max_f16 v6, v6, v6
-; GFX10-NEXT: v_pk_max_f16 v2, v2, v2
-; GFX10-NEXT: v_pk_max_f16 v7, v7, v7
-; GFX10-NEXT: v_pk_max_f16 v3, v3, v3
-; GFX10-NEXT: v_pk_max_f16 v0, v0, v4
-; GFX10-NEXT: v_pk_max_f16 v1, v1, v5
-; GFX10-NEXT: v_pk_max_f16 v2, v2, v6
-; GFX10-NEXT: v_pk_max_f16 v3, v3, v7
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: v_maximumnum_v8f16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_pk_max_f16 v4, v4, v4
-; GFX11-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX11-NEXT: v_pk_max_f16 v5, v5, v5
-; GFX11-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX11-NEXT: v_pk_max_f16 v6, v6, v6
-; GFX11-NEXT: v_pk_max_f16 v2, v2, v2
-; GFX11-NEXT: v_pk_max_f16 v7, v7, v7
-; GFX11-NEXT: v_pk_max_f16 v3, v3, v3
-; GFX11-NEXT: v_pk_max_f16 v0, v0, v4
-; GFX11-NEXT: v_pk_max_f16 v1, v1, v5
-; GFX11-NEXT: v_pk_max_f16 v2, v2, v6
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX11-NEXT: v_pk_max_f16 v3, v3, v7
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_maximumnum_v8f16:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_pk_max_num_f16 v4, v4, v4
-; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v0
-; GFX12-NEXT: v_pk_max_num_f16 v5, v5, v5
-; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v1
-; GFX12-NEXT: v_pk_max_num_f16 v6, v6, v6
-; GFX12-NEXT: v_pk_max_num_f16 v2, v2, v2
-; GFX12-NEXT: v_pk_max_num_f16 v7, v7, v7
-; GFX12-NEXT: v_pk_max_num_f16 v3, v3, v3
-; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v4
-; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v5
-; GFX12-NEXT: v_pk_max_num_f16 v2, v2, v6
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX12-NEXT: v_pk_max_num_f16 v3, v3, v7
-; GFX12-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_maximumnum_v8f16:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v7, v7
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v15, v15
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v6, v6
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v14, v14
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v5, v5
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v13, v13
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v4, v4
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v12, v12
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v11, v11
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v10, v10
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v8, v8
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v9, v9
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v7, v7
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v15, v15
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v6, v6
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v14, v14
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v13, v13
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v12, v12
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v8, v8
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v9, v9
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v10, v10
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v11, v11
+; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v8
+; GFX7-SDAG-NEXT: v_max_f32_e32 v1, v1, v9
+; GFX7-SDAG-NEXT: v_max_f32_e32 v2, v2, v10
+; GFX7-SDAG-NEXT: v_max_f32_e32 v3, v3, v11
+; GFX7-SDAG-NEXT: v_max_f32_e32 v4, v4, v12
+; GFX7-SDAG-NEXT: v_max_f32_e32 v5, v5, v13
+; GFX7-SDAG-NEXT: v_max_f32_e32 v6, v6, v14
+; GFX7-SDAG-NEXT: v_max_f32_e32 v7, v7, v15
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_maximumnum_v8f16:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v8, v8
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v9, v9
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v8
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v8, v10
+; GFX7-GISEL-NEXT: v_max_f32_e32 v1, v1, v9
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v9, v11
+; GFX7-GISEL-NEXT: v_max_f32_e32 v2, v2, v8
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v8, v12
+; GFX7-GISEL-NEXT: v_max_f32_e32 v3, v3, v9
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v9, v13
+; GFX7-GISEL-NEXT: v_max_f32_e32 v4, v4, v8
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v6, v6
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v8, v14
+; GFX7-GISEL-NEXT: v_max_f32_e32 v5, v5, v9
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v7, v7
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v9, v15
+; GFX7-GISEL-NEXT: v_max_f32_e32 v6, v6, v8
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-GISEL-NEXT: v_max_f32_e32 v7, v7, v9
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v4, v4
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v5, v5
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v6, v6
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v7, v7
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_maximumnum_v8f16:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v8, v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v9, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v8, v9, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v9, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v10, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v9, v10, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v10, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v11, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v10, v11, v10 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v11, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v12, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_e32 v7, v7, v7
+; GFX8-SDAG-NEXT: v_max_f16_e32 v3, v3, v3
+; GFX8-SDAG-NEXT: v_max_f16_e32 v6, v6, v6
+; GFX8-SDAG-NEXT: v_max_f16_e32 v2, v2, v2
+; GFX8-SDAG-NEXT: v_max_f16_e32 v5, v5, v5
+; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX8-SDAG-NEXT: v_max_f16_e32 v4, v4, v4
+; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v11, v12, v11 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_max_f16_e32 v3, v3, v7
+; GFX8-SDAG-NEXT: v_max_f16_e32 v2, v2, v6
+; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v5
+; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v4
+; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v11
+; GFX8-SDAG-NEXT: v_or_b32_e32 v1, v1, v10
+; GFX8-SDAG-NEXT: v_or_b32_e32 v2, v2, v9
+; GFX8-SDAG-NEXT: v_or_b32_e32 v3, v3, v8
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_maximumnum_v8f16:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_max_f16_e32 v8, v0, v0
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v9, v1, v1
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v10, v2, v2
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v2, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v11, v3, v3
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v3, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v12, v4, v4
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v4, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v13, v5, v5
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v5, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v14, v6, v6
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v6, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v15, v7, v7
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v7, v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v8, v8, v12
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_max_f16_e32 v4, v9, v13
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v1, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_max_f16_e32 v5, v10, v14
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v2, v2, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_max_f16_e32 v6, v11, v15
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v3, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v8, v0
+; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v4, v1
+; GFX8-GISEL-NEXT: v_or_b32_e32 v2, v5, v2
+; GFX8-GISEL-NEXT: v_or_b32_e32 v3, v6, v3
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_maximumnum_v8f16:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_pk_max_f16 v4, v4, v4
+; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v4
+; GFX900-SDAG-NEXT: v_pk_max_f16 v4, v5, v5
+; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v4
+; GFX900-SDAG-NEXT: v_pk_max_f16 v4, v6, v6
+; GFX900-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX900-SDAG-NEXT: v_pk_max_f16 v2, v2, v4
+; GFX900-SDAG-NEXT: v_pk_max_f16 v4, v7, v7
+; GFX900-SDAG-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX900-SDAG-NEXT: v_pk_max_f16 v3, v3, v4
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: v_maximumnum_v8f16:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX9-GISEL-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX9-GISEL-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX9-GISEL-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX9-GISEL-NEXT: v_pk_max_f16 v4, v4, v4
+; GFX9-GISEL-NEXT: v_pk_max_f16 v5, v5, v5
+; GFX9-GISEL-NEXT: v_pk_max_f16 v6, v6, v6
+; GFX9-GISEL-NEXT: v_pk_max_f16 v7, v7, v7
+; GFX9-GISEL-NEXT: v_pk_max_f16 v0, v0, v4
+; GFX9-GISEL-NEXT: v_pk_max_f16 v1, v1, v5
+; GFX9-GISEL-NEXT: v_pk_max_f16 v2, v2, v6
+; GFX9-GISEL-NEXT: v_pk_max_f16 v3, v3, v7
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_maximumnum_v8f16:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_pk_max_f16 v4, v4, v4
+; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v4
+; GFX950-SDAG-NEXT: v_pk_max_f16 v4, v5, v5
+; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v4
+; GFX950-SDAG-NEXT: v_pk_max_f16 v4, v6, v6
+; GFX950-SDAG-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v2, v4
+; GFX950-SDAG-NEXT: v_pk_max_f16 v4, v7, v7
+; GFX950-SDAG-NEXT: s_nop 0
+; GFX950-SDAG-NEXT: v_pk_max_f16 v3, v3, v4
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_maximumnum_v8f16:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_pk_max_f16 v4, v4, v4
+; GFX10-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX10-SDAG-NEXT: v_pk_max_f16 v5, v5, v5
+; GFX10-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX10-SDAG-NEXT: v_pk_max_f16 v6, v6, v6
+; GFX10-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX10-SDAG-NEXT: v_pk_max_f16 v7, v7, v7
+; GFX10-SDAG-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX10-SDAG-NEXT: v_pk_max_f16 v0, v0, v4
+; GFX10-SDAG-NEXT: v_pk_max_f16 v1, v1, v5
+; GFX10-SDAG-NEXT: v_pk_max_f16 v2, v2, v6
+; GFX10-SDAG-NEXT: v_pk_max_f16 v3, v3, v7
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_maximumnum_v8f16:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX10-GISEL-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX10-GISEL-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX10-GISEL-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX10-GISEL-NEXT: v_pk_max_f16 v4, v4, v4
+; GFX10-GISEL-NEXT: v_pk_max_f16 v5, v5, v5
+; GFX10-GISEL-NEXT: v_pk_max_f16 v6, v6, v6
+; GFX10-GISEL-NEXT: v_pk_max_f16 v7, v7, v7
+; GFX10-GISEL-NEXT: v_pk_max_f16 v0, v0, v4
+; GFX10-GISEL-NEXT: v_pk_max_f16 v1, v1, v5
+; GFX10-GISEL-NEXT: v_pk_max_f16 v2, v2, v6
+; GFX10-GISEL-NEXT: v_pk_max_f16 v3, v3, v7
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_maximumnum_v8f16:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_pk_max_f16 v4, v4, v4
+; GFX11-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX11-SDAG-NEXT: v_pk_max_f16 v5, v5, v5
+; GFX11-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX11-SDAG-NEXT: v_pk_max_f16 v6, v6, v6
+; GFX11-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX11-SDAG-NEXT: v_pk_max_f16 v7, v7, v7
+; GFX11-SDAG-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX11-SDAG-NEXT: v_pk_max_f16 v0, v0, v4
+; GFX11-SDAG-NEXT: v_pk_max_f16 v1, v1, v5
+; GFX11-SDAG-NEXT: v_pk_max_f16 v2, v2, v6
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX11-SDAG-NEXT: v_pk_max_f16 v3, v3, v7
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_maximumnum_v8f16:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX11-GISEL-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX11-GISEL-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX11-GISEL-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX11-GISEL-NEXT: v_pk_max_f16 v4, v4, v4
+; GFX11-GISEL-NEXT: v_pk_max_f16 v5, v5, v5
+; GFX11-GISEL-NEXT: v_pk_max_f16 v6, v6, v6
+; GFX11-GISEL-NEXT: v_pk_max_f16 v7, v7, v7
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-GISEL-NEXT: v_pk_max_f16 v0, v0, v4
+; GFX11-GISEL-NEXT: v_pk_max_f16 v1, v1, v5
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-GISEL-NEXT: v_pk_max_f16 v2, v2, v6
+; GFX11-GISEL-NEXT: v_pk_max_f16 v3, v3, v7
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-SDAG-LABEL: v_maximumnum_v8f16:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v4, v4, v4
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v5, v5, v5
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v6, v6, v6
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v2
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v7, v7, v7
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v3
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v4
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v5
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v6
+; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v7
+; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GISEL-LABEL: v_maximumnum_v8f16:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v2
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v3
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v4, v4, v4
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v5, v5, v5
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v6, v6, v6
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v7, v7, v7
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v4
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v5
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v6
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v7
+; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call <8 x half> @llvm.maximumnum.v8f16(<8 x half> %x, <8 x half> %y)
ret <8 x half> %result
}
-define <16 x half> @v_maximumnum_v16f16(<16 x half> %x, <16 x half> %y) {
-; GFX7-LABEL: v_maximumnum_v16f16:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f16_f32_e32 v16, v16
-; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v16, v16
-; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
-; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX7-NEXT: v_max_f32_e32 v0, v0, v16
-; GFX7-NEXT: v_cvt_f16_f32_e32 v16, v17
-; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
-; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
-; GFX7-NEXT: v_cvt_f16_f32_e32 v17, v20
-; GFX7-NEXT: v_cvt_f32_f16_e32 v16, v16
-; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
-; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
-; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
-; GFX7-NEXT: v_max_f32_e32 v1, v1, v16
-; GFX7-NEXT: v_cvt_f16_f32_e32 v16, v18
-; GFX7-NEXT: v_cvt_f16_f32_e32 v18, v21
-; GFX7-NEXT: v_cvt_f32_f16_e32 v17, v17
-; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
-; GFX7-NEXT: v_cvt_f32_f16_e32 v16, v16
-; GFX7-NEXT: v_cvt_f32_f16_e32 v18, v18
-; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
-; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
-; GFX7-NEXT: v_max_f32_e32 v2, v2, v16
-; GFX7-NEXT: v_cvt_f16_f32_e32 v16, v19
-; GFX7-NEXT: v_cvt_f16_f32_e32 v19, v22
-; GFX7-NEXT: v_cvt_f16_f32_e32 v20, v23
-; GFX7-NEXT: v_max_f32_e32 v4, v4, v17
-; GFX7-NEXT: v_cvt_f32_f16_e32 v16, v16
-; GFX7-NEXT: v_cvt_f32_f16_e32 v19, v19
-; GFX7-NEXT: v_max_f32_e32 v5, v5, v18
-; GFX7-NEXT: v_cvt_f16_f32_e32 v8, v8
-; GFX7-NEXT: v_max_f32_e32 v3, v3, v16
-; GFX7-NEXT: buffer_load_dword v16, off, s[0:3], s32
-; GFX7-NEXT: v_max_f32_e32 v6, v6, v19
-; GFX7-NEXT: v_cvt_f16_f32_e32 v17, v24
-; GFX7-NEXT: v_cvt_f16_f32_e32 v9, v9
-; GFX7-NEXT: v_cvt_f16_f32_e32 v18, v25
-; GFX7-NEXT: v_cvt_f16_f32_e32 v10, v10
-; GFX7-NEXT: v_cvt_f16_f32_e32 v19, v26
-; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
-; GFX7-NEXT: v_cvt_f32_f16_e32 v20, v20
-; GFX7-NEXT: v_cvt_f32_f16_e32 v8, v8
-; GFX7-NEXT: v_cvt_f32_f16_e32 v17, v17
-; GFX7-NEXT: v_cvt_f32_f16_e32 v9, v9
-; GFX7-NEXT: v_cvt_f32_f16_e32 v18, v18
-; GFX7-NEXT: v_cvt_f32_f16_e32 v10, v10
-; GFX7-NEXT: v_cvt_f32_f16_e32 v19, v19
-; GFX7-NEXT: v_max_f32_e32 v7, v7, v20
-; GFX7-NEXT: v_cvt_f16_f32_e32 v11, v11
-; GFX7-NEXT: v_cvt_f16_f32_e32 v20, v27
-; GFX7-NEXT: v_max_f32_e32 v8, v8, v17
-; GFX7-NEXT: v_max_f32_e32 v9, v9, v18
-; GFX7-NEXT: v_max_f32_e32 v10, v10, v19
-; GFX7-NEXT: v_cvt_f16_f32_e32 v12, v12
-; GFX7-NEXT: v_cvt_f16_f32_e32 v17, v28
-; GFX7-NEXT: v_cvt_f16_f32_e32 v13, v13
-; GFX7-NEXT: v_cvt_f16_f32_e32 v18, v29
-; GFX7-NEXT: v_cvt_f16_f32_e32 v14, v14
-; GFX7-NEXT: v_cvt_f16_f32_e32 v19, v30
-; GFX7-NEXT: v_cvt_f16_f32_e32 v15, v15
-; GFX7-NEXT: v_cvt_f32_f16_e32 v11, v11
-; GFX7-NEXT: v_cvt_f32_f16_e32 v20, v20
-; GFX7-NEXT: v_cvt_f32_f16_e32 v12, v12
-; GFX7-NEXT: v_cvt_f32_f16_e32 v17, v17
-; GFX7-NEXT: v_cvt_f32_f16_e32 v13, v13
-; GFX7-NEXT: v_cvt_f32_f16_e32 v18, v18
-; GFX7-NEXT: v_cvt_f32_f16_e32 v14, v14
-; GFX7-NEXT: v_cvt_f32_f16_e32 v19, v19
-; GFX7-NEXT: v_cvt_f32_f16_e32 v15, v15
-; GFX7-NEXT: v_max_f32_e32 v11, v11, v20
-; GFX7-NEXT: v_max_f32_e32 v12, v12, v17
-; GFX7-NEXT: v_max_f32_e32 v13, v13, v18
-; GFX7-NEXT: v_max_f32_e32 v14, v14, v19
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v16, v16
-; GFX7-NEXT: v_cvt_f32_f16_e32 v16, v16
-; GFX7-NEXT: v_max_f32_e32 v15, v15, v16
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_maximumnum_v16f16:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_max_f16_sdwa v16, v15, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v17, v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v16, v17, v16 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_max_f16_sdwa v17, v14, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v18, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v17, v18, v17 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_max_f16_sdwa v18, v13, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v19, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v18, v19, v18 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_max_f16_sdwa v19, v12, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v20, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v19, v20, v19 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_max_f16_sdwa v20, v11, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v21, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v20, v21, v20 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_max_f16_sdwa v21, v10, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v22, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v21, v22, v21 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_max_f16_sdwa v22, v9, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v23, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v22, v23, v22 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_max_f16_sdwa v23, v8, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v24, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_e32 v15, v15, v15
-; GFX8-NEXT: v_max_f16_e32 v7, v7, v7
-; GFX8-NEXT: v_max_f16_e32 v14, v14, v14
-; GFX8-NEXT: v_max_f16_e32 v6, v6, v6
-; GFX8-NEXT: v_max_f16_e32 v13, v13, v13
-; GFX8-NEXT: v_max_f16_e32 v5, v5, v5
-; GFX8-NEXT: v_max_f16_e32 v12, v12, v12
-; GFX8-NEXT: v_max_f16_e32 v4, v4, v4
-; GFX8-NEXT: v_max_f16_e32 v11, v11, v11
-; GFX8-NEXT: v_max_f16_e32 v3, v3, v3
-; GFX8-NEXT: v_max_f16_e32 v10, v10, v10
-; GFX8-NEXT: v_max_f16_e32 v2, v2, v2
-; GFX8-NEXT: v_max_f16_e32 v9, v9, v9
-; GFX8-NEXT: v_max_f16_e32 v1, v1, v1
-; GFX8-NEXT: v_max_f16_e32 v8, v8, v8
-; GFX8-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX8-NEXT: v_max_f16_sdwa v23, v24, v23 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_max_f16_e32 v7, v7, v15
-; GFX8-NEXT: v_max_f16_e32 v6, v6, v14
-; GFX8-NEXT: v_max_f16_e32 v5, v5, v13
-; GFX8-NEXT: v_max_f16_e32 v4, v4, v12
-; GFX8-NEXT: v_max_f16_e32 v3, v3, v11
-; GFX8-NEXT: v_max_f16_e32 v2, v2, v10
-; GFX8-NEXT: v_max_f16_e32 v1, v1, v9
-; GFX8-NEXT: v_max_f16_e32 v0, v0, v8
-; GFX8-NEXT: v_or_b32_e32 v0, v0, v23
-; GFX8-NEXT: v_or_b32_e32 v1, v1, v22
-; GFX8-NEXT: v_or_b32_e32 v2, v2, v21
-; GFX8-NEXT: v_or_b32_e32 v3, v3, v20
-; GFX8-NEXT: v_or_b32_e32 v4, v4, v19
-; GFX8-NEXT: v_or_b32_e32 v5, v5, v18
-; GFX8-NEXT: v_or_b32_e32 v6, v6, v17
-; GFX8-NEXT: v_or_b32_e32 v7, v7, v16
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX900-LABEL: v_maximumnum_v16f16:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_pk_max_f16 v8, v8, v8
-; GFX900-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX900-NEXT: v_pk_max_f16 v0, v0, v8
-; GFX900-NEXT: v_pk_max_f16 v8, v9, v9
-; GFX900-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX900-NEXT: v_pk_max_f16 v1, v1, v8
-; GFX900-NEXT: v_pk_max_f16 v8, v10, v10
-; GFX900-NEXT: v_pk_max_f16 v2, v2, v2
-; GFX900-NEXT: v_pk_max_f16 v2, v2, v8
-; GFX900-NEXT: v_pk_max_f16 v8, v11, v11
-; GFX900-NEXT: v_pk_max_f16 v3, v3, v3
-; GFX900-NEXT: v_pk_max_f16 v3, v3, v8
-; GFX900-NEXT: v_pk_max_f16 v8, v12, v12
-; GFX900-NEXT: v_pk_max_f16 v4, v4, v4
-; GFX900-NEXT: v_pk_max_f16 v4, v4, v8
-; GFX900-NEXT: v_pk_max_f16 v8, v13, v13
-; GFX900-NEXT: v_pk_max_f16 v5, v5, v5
-; GFX900-NEXT: v_pk_max_f16 v5, v5, v8
-; GFX900-NEXT: v_pk_max_f16 v8, v14, v14
-; GFX900-NEXT: v_pk_max_f16 v6, v6, v6
-; GFX900-NEXT: v_pk_max_f16 v6, v6, v8
-; GFX900-NEXT: v_pk_max_f16 v8, v15, v15
-; GFX900-NEXT: v_pk_max_f16 v7, v7, v7
-; GFX900-NEXT: v_pk_max_f16 v7, v7, v8
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX950-LABEL: v_maximumnum_v16f16:
-; GFX950: ; %bb.0:
-; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_pk_max_f16 v8, v8, v8
-; GFX950-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX950-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX950-NEXT: v_pk_max_f16 v0, v0, v8
-; GFX950-NEXT: v_pk_max_f16 v8, v9, v9
-; GFX950-NEXT: v_pk_max_f16 v2, v2, v2
-; GFX950-NEXT: v_pk_max_f16 v1, v1, v8
-; GFX950-NEXT: v_pk_max_f16 v8, v10, v10
-; GFX950-NEXT: v_pk_max_f16 v3, v3, v3
-; GFX950-NEXT: v_pk_max_f16 v2, v2, v8
-; GFX950-NEXT: v_pk_max_f16 v8, v11, v11
-; GFX950-NEXT: v_pk_max_f16 v4, v4, v4
-; GFX950-NEXT: v_pk_max_f16 v3, v3, v8
-; GFX950-NEXT: v_pk_max_f16 v8, v12, v12
-; GFX950-NEXT: v_pk_max_f16 v5, v5, v5
-; GFX950-NEXT: v_pk_max_f16 v4, v4, v8
-; GFX950-NEXT: v_pk_max_f16 v8, v13, v13
-; GFX950-NEXT: v_pk_max_f16 v6, v6, v6
-; GFX950-NEXT: v_pk_max_f16 v5, v5, v8
-; GFX950-NEXT: v_pk_max_f16 v8, v14, v14
-; GFX950-NEXT: v_pk_max_f16 v7, v7, v7
-; GFX950-NEXT: v_pk_max_f16 v6, v6, v8
-; GFX950-NEXT: v_pk_max_f16 v8, v15, v15
-; GFX950-NEXT: s_nop 0
-; GFX950-NEXT: v_pk_max_f16 v7, v7, v8
-; GFX950-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_maximumnum_v16f16:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_pk_max_f16 v8, v8, v8
-; GFX10-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX10-NEXT: v_pk_max_f16 v9, v9, v9
-; GFX10-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX10-NEXT: v_pk_max_f16 v10, v10, v10
-; GFX10-NEXT: v_pk_max_f16 v2, v2, v2
-; GFX10-NEXT: v_pk_max_f16 v0, v0, v8
-; GFX10-NEXT: v_pk_max_f16 v8, v11, v11
-; GFX10-NEXT: v_pk_max_f16 v1, v1, v9
-; GFX10-NEXT: v_pk_max_f16 v3, v3, v3
-; GFX10-NEXT: v_pk_max_f16 v2, v2, v10
-; GFX10-NEXT: v_pk_max_f16 v9, v12, v12
-; GFX10-NEXT: v_pk_max_f16 v4, v4, v4
-; GFX10-NEXT: v_pk_max_f16 v10, v13, v13
-; GFX10-NEXT: v_pk_max_f16 v5, v5, v5
-; GFX10-NEXT: v_pk_max_f16 v11, v14, v14
-; GFX10-NEXT: v_pk_max_f16 v6, v6, v6
-; GFX10-NEXT: v_pk_max_f16 v12, v15, v15
-; GFX10-NEXT: v_pk_max_f16 v7, v7, v7
-; GFX10-NEXT: v_pk_max_f16 v3, v3, v8
-; GFX10-NEXT: v_pk_max_f16 v4, v4, v9
-; GFX10-NEXT: v_pk_max_f16 v5, v5, v10
-; GFX10-NEXT: v_pk_max_f16 v6, v6, v11
-; GFX10-NEXT: v_pk_max_f16 v7, v7, v12
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: v_maximumnum_v16f16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_pk_max_f16 v8, v8, v8
-; GFX11-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX11-NEXT: v_pk_max_f16 v9, v9, v9
-; GFX11-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX11-NEXT: v_pk_max_f16 v10, v10, v10
-; GFX11-NEXT: v_pk_max_f16 v2, v2, v2
-; GFX11-NEXT: v_pk_max_f16 v0, v0, v8
-; GFX11-NEXT: v_pk_max_f16 v8, v11, v11
-; GFX11-NEXT: v_pk_max_f16 v1, v1, v9
-; GFX11-NEXT: v_pk_max_f16 v3, v3, v3
-; GFX11-NEXT: v_pk_max_f16 v2, v2, v10
-; GFX11-NEXT: v_pk_max_f16 v9, v12, v12
-; GFX11-NEXT: v_pk_max_f16 v4, v4, v4
-; GFX11-NEXT: v_pk_max_f16 v10, v13, v13
-; GFX11-NEXT: v_pk_max_f16 v5, v5, v5
-; GFX11-NEXT: v_pk_max_f16 v11, v14, v14
-; GFX11-NEXT: v_pk_max_f16 v6, v6, v6
-; GFX11-NEXT: v_pk_max_f16 v12, v15, v15
-; GFX11-NEXT: v_pk_max_f16 v7, v7, v7
-; GFX11-NEXT: v_pk_max_f16 v3, v3, v8
-; GFX11-NEXT: v_pk_max_f16 v4, v4, v9
-; GFX11-NEXT: v_pk_max_f16 v5, v5, v10
-; GFX11-NEXT: v_pk_max_f16 v6, v6, v11
-; GFX11-NEXT: v_pk_max_f16 v7, v7, v12
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_maximumnum_v16f16:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_pk_max_num_f16 v8, v8, v8
-; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v0
-; GFX12-NEXT: v_pk_max_num_f16 v9, v9, v9
-; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v1
-; GFX12-NEXT: v_pk_max_num_f16 v10, v10, v10
-; GFX12-NEXT: v_pk_max_num_f16 v2, v2, v2
-; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v8
-; GFX12-NEXT: v_pk_max_num_f16 v8, v11, v11
-; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v9
-; GFX12-NEXT: v_pk_max_num_f16 v3, v3, v3
-; GFX12-NEXT: v_pk_max_num_f16 v2, v2, v10
-; GFX12-NEXT: v_pk_max_num_f16 v9, v12, v12
-; GFX12-NEXT: v_pk_max_num_f16 v4, v4, v4
-; GFX12-NEXT: v_pk_max_num_f16 v10, v13, v13
-; GFX12-NEXT: v_pk_max_num_f16 v5, v5, v5
-; GFX12-NEXT: v_pk_max_num_f16 v11, v14, v14
-; GFX12-NEXT: v_pk_max_num_f16 v6, v6, v6
-; GFX12-NEXT: v_pk_max_num_f16 v12, v15, v15
-; GFX12-NEXT: v_pk_max_num_f16 v7, v7, v7
-; GFX12-NEXT: v_pk_max_num_f16 v3, v3, v8
-; GFX12-NEXT: v_pk_max_num_f16 v4, v4, v9
-; GFX12-NEXT: v_pk_max_num_f16 v5, v5, v10
-; GFX12-NEXT: v_pk_max_num_f16 v6, v6, v11
-; GFX12-NEXT: v_pk_max_num_f16 v7, v7, v12
-; GFX12-NEXT: s_setpc_b64 s[30:31]
- %result = call <16 x half> @llvm.maximumnum.v16f16(<16 x half> %x, <16 x half> %y)
- ret <16 x half> %result
-}
-
-define <32 x half> @v_maximumnum_v32f16(<32 x half> %x, <32 x half> %y) {
-; GFX7-LABEL: v_maximumnum_v32f16:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
-; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
-; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
-; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
-; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
-; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
-; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
-; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
-; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
-; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
-; GFX7-NEXT: v_cvt_f16_f32_e32 v8, v8
-; GFX7-NEXT: v_cvt_f16_f32_e32 v9, v9
-; GFX7-NEXT: v_cvt_f16_f32_e32 v10, v10
-; GFX7-NEXT: v_cvt_f16_f32_e32 v11, v11
-; GFX7-NEXT: v_cvt_f32_f16_e32 v8, v8
-; GFX7-NEXT: v_cvt_f32_f16_e32 v9, v9
-; GFX7-NEXT: v_cvt_f32_f16_e32 v10, v10
-; GFX7-NEXT: v_cvt_f32_f16_e32 v11, v11
-; GFX7-NEXT: v_cvt_f16_f32_e32 v12, v12
-; GFX7-NEXT: v_cvt_f16_f32_e32 v13, v13
-; GFX7-NEXT: v_cvt_f16_f32_e32 v14, v14
-; GFX7-NEXT: v_cvt_f16_f32_e32 v15, v15
-; GFX7-NEXT: v_cvt_f32_f16_e32 v12, v12
-; GFX7-NEXT: v_cvt_f32_f16_e32 v13, v13
-; GFX7-NEXT: v_cvt_f32_f16_e32 v14, v14
-; GFX7-NEXT: v_cvt_f32_f16_e32 v15, v15
-; GFX7-NEXT: v_cvt_f16_f32_e32 v16, v16
-; GFX7-NEXT: v_cvt_f16_f32_e32 v17, v17
-; GFX7-NEXT: v_cvt_f16_f32_e32 v18, v18
-; GFX7-NEXT: v_cvt_f16_f32_e32 v19, v19
-; GFX7-NEXT: v_cvt_f32_f16_e32 v16, v16
-; GFX7-NEXT: v_cvt_f32_f16_e32 v17, v17
-; GFX7-NEXT: v_cvt_f32_f16_e32 v18, v18
-; GFX7-NEXT: v_cvt_f32_f16_e32 v19, v19
-; GFX7-NEXT: v_cvt_f16_f32_e32 v20, v20
-; GFX7-NEXT: v_cvt_f16_f32_e32 v21, v21
-; GFX7-NEXT: v_cvt_f16_f32_e32 v22, v22
-; GFX7-NEXT: v_cvt_f16_f32_e32 v23, v23
-; GFX7-NEXT: v_cvt_f32_f16_e32 v20, v20
-; GFX7-NEXT: v_cvt_f32_f16_e32 v21, v21
-; GFX7-NEXT: v_cvt_f32_f16_e32 v22, v22
-; GFX7-NEXT: v_cvt_f32_f16_e32 v23, v23
-; GFX7-NEXT: v_cvt_f16_f32_e32 v24, v24
-; GFX7-NEXT: v_cvt_f16_f32_e32 v25, v25
-; GFX7-NEXT: v_cvt_f16_f32_e32 v26, v26
-; GFX7-NEXT: v_cvt_f16_f32_e32 v27, v27
-; GFX7-NEXT: v_cvt_f32_f16_e32 v24, v24
-; GFX7-NEXT: v_cvt_f32_f16_e32 v25, v25
-; GFX7-NEXT: v_cvt_f32_f16_e32 v26, v26
-; GFX7-NEXT: v_cvt_f32_f16_e32 v27, v27
-; GFX7-NEXT: v_cvt_f16_f32_e32 v28, v28
-; GFX7-NEXT: v_cvt_f16_f32_e32 v29, v29
-; GFX7-NEXT: v_cvt_f16_f32_e32 v30, v30
-; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:128
-; GFX7-NEXT: v_cvt_f32_f16_e32 v28, v28
-; GFX7-NEXT: v_cvt_f32_f16_e32 v29, v29
-; GFX7-NEXT: v_cvt_f32_f16_e32 v30, v30
-; GFX7-NEXT: s_waitcnt vmcnt(1)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_max_f32_e32 v0, v0, v31
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:8
-; GFX7-NEXT: s_waitcnt vmcnt(1)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v32, v32
-; GFX7-NEXT: v_cvt_f32_f16_e32 v32, v32
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_max_f32_e32 v1, v1, v31
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:12
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_max_f32_e32 v2, v2, v31
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:16
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_max_f32_e32 v3, v3, v31
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:20
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_max_f32_e32 v4, v4, v31
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:24
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_max_f32_e32 v5, v5, v31
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_max_f32_e32 v6, v6, v31
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:32
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_max_f32_e32 v7, v7, v31
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:36
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_max_f32_e32 v8, v8, v31
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:40
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_max_f32_e32 v9, v9, v31
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:44
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_max_f32_e32 v10, v10, v31
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:48
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_max_f32_e32 v11, v11, v31
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:52
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_max_f32_e32 v12, v12, v31
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:56
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_max_f32_e32 v13, v13, v31
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:60
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_max_f32_e32 v14, v14, v31
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:64
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_max_f32_e32 v15, v15, v31
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:68
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_max_f32_e32 v16, v16, v31
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:72
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_max_f32_e32 v17, v17, v31
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:76
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_max_f32_e32 v18, v18, v31
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:80
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_max_f32_e32 v19, v19, v31
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:84
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_max_f32_e32 v20, v20, v31
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:88
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_max_f32_e32 v21, v21, v31
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:92
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_max_f32_e32 v22, v22, v31
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:96
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_max_f32_e32 v23, v23, v31
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:100
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_max_f32_e32 v24, v24, v31
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:104
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_max_f32_e32 v25, v25, v31
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:108
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_max_f32_e32 v26, v26, v31
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:112
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_max_f32_e32 v27, v27, v31
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_max_f32_e32 v28, v28, v31
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:120
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_max_f32_e32 v29, v29, v31
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:124
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_max_f32_e32 v30, v30, v31
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_max_f32_e32 v31, v31, v32
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_maximumnum_v32f16:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
-; GFX8-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
-; GFX8-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
-; GFX8-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
-; GFX8-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
-; GFX8-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
-; GFX8-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
-; GFX8-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
-; GFX8-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
-; GFX8-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
-; GFX8-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
-; GFX8-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
-; GFX8-NEXT: v_max_f16_sdwa v38, v27, v27 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v39, v11, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v48, v26, v26 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v49, v10, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v50, v25, v25 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v51, v9, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v40, v22, v22 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v41, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v58, v17, v17 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v59, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_e32 v17, v17, v17
-; GFX8-NEXT: v_max_f16_e32 v1, v1, v1
-; GFX8-NEXT: v_max_f16_sdwa v52, v24, v24 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v53, v8, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v54, v23, v23 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v55, v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v42, v21, v21 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v43, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v44, v20, v20 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v45, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v46, v19, v19 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v47, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v56, v18, v18 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v57, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v38, v39, v38 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_max_f16_sdwa v39, v49, v48 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_max_f16_sdwa v48, v51, v50 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_max_f16_sdwa v51, v41, v40 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_max_f16_sdwa v40, v59, v58 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_max_f16_e32 v1, v1, v17
-; GFX8-NEXT: v_max_f16_sdwa v49, v53, v52 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_max_f16_sdwa v50, v55, v54 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_max_f16_sdwa v52, v43, v42 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_max_f16_sdwa v53, v45, v44 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_max_f16_sdwa v54, v47, v46 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_max_f16_sdwa v55, v57, v56 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_or_b32_e32 v1, v1, v40
-; GFX8-NEXT: buffer_load_dword v59, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
-; GFX8-NEXT: buffer_load_dword v58, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
-; GFX8-NEXT: buffer_load_dword v57, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
-; GFX8-NEXT: buffer_load_dword v56, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
-; GFX8-NEXT: buffer_load_dword v47, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
-; GFX8-NEXT: buffer_load_dword v46, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
-; GFX8-NEXT: buffer_load_dword v45, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
-; GFX8-NEXT: buffer_load_dword v44, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
-; GFX8-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
-; GFX8-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
-; GFX8-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
-; GFX8-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
-; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32
-; GFX8-NEXT: v_max_f16_sdwa v32, v30, v30 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v33, v14, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v34, v29, v29 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v35, v13, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v36, v28, v28 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v37, v12, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v32, v33, v32 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_max_f16_sdwa v33, v16, v16 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v34, v35, v34 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_max_f16_sdwa v35, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v36, v37, v36 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_max_f16_sdwa v37, v15, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_e32 v15, v15, v15
-; GFX8-NEXT: v_max_f16_sdwa v33, v35, v33 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_max_f16_e32 v30, v30, v30
-; GFX8-NEXT: v_max_f16_e32 v14, v14, v14
-; GFX8-NEXT: v_max_f16_e32 v29, v29, v29
-; GFX8-NEXT: v_max_f16_e32 v13, v13, v13
-; GFX8-NEXT: v_max_f16_e32 v28, v28, v28
-; GFX8-NEXT: v_max_f16_e32 v12, v12, v12
-; GFX8-NEXT: v_max_f16_e32 v27, v27, v27
-; GFX8-NEXT: v_max_f16_e32 v11, v11, v11
-; GFX8-NEXT: v_max_f16_e32 v26, v26, v26
-; GFX8-NEXT: v_max_f16_e32 v10, v10, v10
-; GFX8-NEXT: v_max_f16_e32 v25, v25, v25
-; GFX8-NEXT: v_max_f16_e32 v9, v9, v9
-; GFX8-NEXT: v_max_f16_e32 v24, v24, v24
-; GFX8-NEXT: v_max_f16_e32 v8, v8, v8
-; GFX8-NEXT: v_max_f16_e32 v23, v23, v23
-; GFX8-NEXT: v_max_f16_e32 v7, v7, v7
-; GFX8-NEXT: v_max_f16_e32 v22, v22, v22
-; GFX8-NEXT: v_max_f16_e32 v6, v6, v6
-; GFX8-NEXT: v_max_f16_e32 v21, v21, v21
-; GFX8-NEXT: v_max_f16_e32 v5, v5, v5
-; GFX8-NEXT: v_max_f16_e32 v20, v20, v20
-; GFX8-NEXT: v_max_f16_e32 v4, v4, v4
-; GFX8-NEXT: v_max_f16_e32 v19, v19, v19
-; GFX8-NEXT: v_max_f16_e32 v3, v3, v3
-; GFX8-NEXT: v_max_f16_e32 v18, v18, v18
-; GFX8-NEXT: v_max_f16_e32 v2, v2, v2
-; GFX8-NEXT: v_max_f16_e32 v16, v16, v16
-; GFX8-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX8-NEXT: v_max_f16_e32 v14, v14, v30
-; GFX8-NEXT: v_max_f16_e32 v13, v13, v29
-; GFX8-NEXT: v_max_f16_e32 v12, v12, v28
-; GFX8-NEXT: v_max_f16_e32 v11, v11, v27
-; GFX8-NEXT: v_max_f16_e32 v10, v10, v26
-; GFX8-NEXT: v_max_f16_e32 v9, v9, v25
-; GFX8-NEXT: v_max_f16_e32 v8, v8, v24
-; GFX8-NEXT: v_max_f16_e32 v7, v7, v23
-; GFX8-NEXT: v_max_f16_e32 v6, v6, v22
-; GFX8-NEXT: v_max_f16_e32 v5, v5, v21
-; GFX8-NEXT: v_max_f16_e32 v4, v4, v20
-; GFX8-NEXT: v_max_f16_e32 v3, v3, v19
-; GFX8-NEXT: v_max_f16_e32 v2, v2, v18
-; GFX8-NEXT: v_max_f16_e32 v0, v0, v16
-; GFX8-NEXT: v_or_b32_e32 v0, v0, v33
-; GFX8-NEXT: v_or_b32_e32 v2, v2, v55
-; GFX8-NEXT: v_or_b32_e32 v3, v3, v54
-; GFX8-NEXT: v_or_b32_e32 v4, v4, v53
-; GFX8-NEXT: v_or_b32_e32 v5, v5, v52
-; GFX8-NEXT: v_or_b32_e32 v6, v6, v51
-; GFX8-NEXT: v_or_b32_e32 v7, v7, v50
-; GFX8-NEXT: v_or_b32_e32 v8, v8, v49
-; GFX8-NEXT: v_or_b32_e32 v9, v9, v48
-; GFX8-NEXT: v_or_b32_e32 v10, v10, v39
-; GFX8-NEXT: v_or_b32_e32 v11, v11, v38
-; GFX8-NEXT: v_or_b32_e32 v12, v12, v36
-; GFX8-NEXT: v_or_b32_e32 v13, v13, v34
-; GFX8-NEXT: v_or_b32_e32 v14, v14, v32
-; GFX8-NEXT: s_waitcnt vmcnt(0)
-; GFX8-NEXT: v_max_f16_sdwa v35, v31, v31 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_e32 v31, v31, v31
-; GFX8-NEXT: v_max_f16_sdwa v35, v37, v35 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_max_f16_e32 v15, v15, v31
-; GFX8-NEXT: v_or_b32_e32 v15, v15, v35
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX900-LABEL: v_maximumnum_v32f16:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_pk_max_f16 v16, v16, v16
-; GFX900-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX900-NEXT: v_pk_max_f16 v0, v0, v16
-; GFX900-NEXT: v_pk_max_f16 v16, v17, v17
-; GFX900-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX900-NEXT: v_pk_max_f16 v1, v1, v16
-; GFX900-NEXT: v_pk_max_f16 v16, v18, v18
-; GFX900-NEXT: v_pk_max_f16 v2, v2, v2
-; GFX900-NEXT: v_pk_max_f16 v2, v2, v16
-; GFX900-NEXT: v_pk_max_f16 v16, v19, v19
-; GFX900-NEXT: v_pk_max_f16 v3, v3, v3
-; GFX900-NEXT: v_pk_max_f16 v3, v3, v16
-; GFX900-NEXT: buffer_load_dword v16, off, s[0:3], s32
-; GFX900-NEXT: v_pk_max_f16 v17, v20, v20
-; GFX900-NEXT: v_pk_max_f16 v4, v4, v4
-; GFX900-NEXT: v_pk_max_f16 v18, v21, v21
-; GFX900-NEXT: v_pk_max_f16 v5, v5, v5
-; GFX900-NEXT: v_pk_max_f16 v19, v22, v22
-; GFX900-NEXT: v_pk_max_f16 v6, v6, v6
-; GFX900-NEXT: v_pk_max_f16 v20, v23, v23
-; GFX900-NEXT: v_pk_max_f16 v7, v7, v7
-; GFX900-NEXT: v_pk_max_f16 v21, v24, v24
-; GFX900-NEXT: v_pk_max_f16 v8, v8, v8
-; GFX900-NEXT: v_pk_max_f16 v22, v25, v25
-; GFX900-NEXT: v_pk_max_f16 v9, v9, v9
-; GFX900-NEXT: v_pk_max_f16 v23, v26, v26
-; GFX900-NEXT: v_pk_max_f16 v10, v10, v10
-; GFX900-NEXT: v_pk_max_f16 v24, v27, v27
-; GFX900-NEXT: v_pk_max_f16 v11, v11, v11
-; GFX900-NEXT: v_pk_max_f16 v25, v28, v28
-; GFX900-NEXT: v_pk_max_f16 v12, v12, v12
-; GFX900-NEXT: v_pk_max_f16 v26, v29, v29
-; GFX900-NEXT: v_pk_max_f16 v13, v13, v13
-; GFX900-NEXT: v_pk_max_f16 v27, v30, v30
-; GFX900-NEXT: v_pk_max_f16 v14, v14, v14
-; GFX900-NEXT: v_pk_max_f16 v15, v15, v15
-; GFX900-NEXT: v_pk_max_f16 v4, v4, v17
-; GFX900-NEXT: v_pk_max_f16 v5, v5, v18
-; GFX900-NEXT: v_pk_max_f16 v6, v6, v19
-; GFX900-NEXT: v_pk_max_f16 v7, v7, v20
-; GFX900-NEXT: v_pk_max_f16 v8, v8, v21
-; GFX900-NEXT: v_pk_max_f16 v9, v9, v22
-; GFX900-NEXT: v_pk_max_f16 v10, v10, v23
-; GFX900-NEXT: v_pk_max_f16 v11, v11, v24
-; GFX900-NEXT: v_pk_max_f16 v12, v12, v25
-; GFX900-NEXT: v_pk_max_f16 v13, v13, v26
-; GFX900-NEXT: v_pk_max_f16 v14, v14, v27
-; GFX900-NEXT: s_waitcnt vmcnt(0)
-; GFX900-NEXT: v_pk_max_f16 v16, v16, v16
-; GFX900-NEXT: v_pk_max_f16 v15, v15, v16
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX950-LABEL: v_maximumnum_v32f16:
-; GFX950: ; %bb.0:
-; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: scratch_load_dword v31, off, s32
-; GFX950-NEXT: v_pk_max_f16 v16, v16, v16
-; GFX950-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX950-NEXT: v_pk_max_f16 v17, v17, v17
-; GFX950-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX950-NEXT: v_pk_max_f16 v18, v18, v18
-; GFX950-NEXT: v_pk_max_f16 v2, v2, v2
-; GFX950-NEXT: v_pk_max_f16 v19, v19, v19
-; GFX950-NEXT: v_pk_max_f16 v3, v3, v3
-; GFX950-NEXT: v_pk_max_f16 v20, v20, v20
-; GFX950-NEXT: v_pk_max_f16 v4, v4, v4
-; GFX950-NEXT: v_pk_max_f16 v21, v21, v21
-; GFX950-NEXT: v_pk_max_f16 v5, v5, v5
-; GFX950-NEXT: v_pk_max_f16 v22, v22, v22
-; GFX950-NEXT: v_pk_max_f16 v6, v6, v6
-; GFX950-NEXT: v_pk_max_f16 v23, v23, v23
-; GFX950-NEXT: v_pk_max_f16 v7, v7, v7
-; GFX950-NEXT: v_pk_max_f16 v24, v24, v24
-; GFX950-NEXT: v_pk_max_f16 v8, v8, v8
-; GFX950-NEXT: v_pk_max_f16 v25, v25, v25
-; GFX950-NEXT: v_pk_max_f16 v9, v9, v9
-; GFX950-NEXT: v_pk_max_f16 v26, v26, v26
-; GFX950-NEXT: v_pk_max_f16 v10, v10, v10
-; GFX950-NEXT: v_pk_max_f16 v27, v27, v27
-; GFX950-NEXT: v_pk_max_f16 v11, v11, v11
-; GFX950-NEXT: v_pk_max_f16 v28, v28, v28
-; GFX950-NEXT: v_pk_max_f16 v12, v12, v12
-; GFX950-NEXT: v_pk_max_f16 v29, v29, v29
-; GFX950-NEXT: v_pk_max_f16 v13, v13, v13
-; GFX950-NEXT: v_pk_max_f16 v30, v30, v30
-; GFX950-NEXT: v_pk_max_f16 v14, v14, v14
-; GFX950-NEXT: v_pk_max_f16 v15, v15, v15
-; GFX950-NEXT: v_pk_max_f16 v0, v0, v16
-; GFX950-NEXT: v_pk_max_f16 v1, v1, v17
-; GFX950-NEXT: v_pk_max_f16 v2, v2, v18
-; GFX950-NEXT: v_pk_max_f16 v3, v3, v19
-; GFX950-NEXT: v_pk_max_f16 v4, v4, v20
-; GFX950-NEXT: v_pk_max_f16 v5, v5, v21
-; GFX950-NEXT: v_pk_max_f16 v6, v6, v22
-; GFX950-NEXT: v_pk_max_f16 v7, v7, v23
-; GFX950-NEXT: v_pk_max_f16 v8, v8, v24
-; GFX950-NEXT: v_pk_max_f16 v9, v9, v25
-; GFX950-NEXT: v_pk_max_f16 v10, v10, v26
-; GFX950-NEXT: v_pk_max_f16 v11, v11, v27
-; GFX950-NEXT: v_pk_max_f16 v12, v12, v28
-; GFX950-NEXT: v_pk_max_f16 v13, v13, v29
-; GFX950-NEXT: v_pk_max_f16 v14, v14, v30
-; GFX950-NEXT: s_waitcnt vmcnt(0)
-; GFX950-NEXT: v_pk_max_f16 v16, v31, v31
-; GFX950-NEXT: s_nop 0
-; GFX950-NEXT: v_pk_max_f16 v15, v15, v16
-; GFX950-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_maximumnum_v32f16:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32
-; GFX10-NEXT: v_pk_max_f16 v16, v16, v16
-; GFX10-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX10-NEXT: v_pk_max_f16 v17, v17, v17
-; GFX10-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX10-NEXT: v_pk_max_f16 v18, v18, v18
-; GFX10-NEXT: v_pk_max_f16 v2, v2, v2
-; GFX10-NEXT: v_pk_max_f16 v19, v19, v19
-; GFX10-NEXT: v_pk_max_f16 v3, v3, v3
-; GFX10-NEXT: v_pk_max_f16 v20, v20, v20
-; GFX10-NEXT: v_pk_max_f16 v4, v4, v4
-; GFX10-NEXT: v_pk_max_f16 v21, v21, v21
-; GFX10-NEXT: v_pk_max_f16 v5, v5, v5
-; GFX10-NEXT: v_pk_max_f16 v22, v22, v22
-; GFX10-NEXT: v_pk_max_f16 v6, v6, v6
-; GFX10-NEXT: v_pk_max_f16 v23, v23, v23
-; GFX10-NEXT: v_pk_max_f16 v7, v7, v7
-; GFX10-NEXT: v_pk_max_f16 v24, v24, v24
-; GFX10-NEXT: v_pk_max_f16 v8, v8, v8
-; GFX10-NEXT: v_pk_max_f16 v25, v25, v25
-; GFX10-NEXT: v_pk_max_f16 v9, v9, v9
-; GFX10-NEXT: v_pk_max_f16 v26, v26, v26
-; GFX10-NEXT: v_pk_max_f16 v10, v10, v10
-; GFX10-NEXT: v_pk_max_f16 v27, v27, v27
-; GFX10-NEXT: v_pk_max_f16 v11, v11, v11
-; GFX10-NEXT: v_pk_max_f16 v28, v28, v28
-; GFX10-NEXT: v_pk_max_f16 v12, v12, v12
-; GFX10-NEXT: v_pk_max_f16 v29, v29, v29
-; GFX10-NEXT: v_pk_max_f16 v13, v13, v13
-; GFX10-NEXT: v_pk_max_f16 v30, v30, v30
-; GFX10-NEXT: v_pk_max_f16 v14, v14, v14
-; GFX10-NEXT: v_pk_max_f16 v15, v15, v15
-; GFX10-NEXT: v_pk_max_f16 v0, v0, v16
-; GFX10-NEXT: v_pk_max_f16 v1, v1, v17
-; GFX10-NEXT: v_pk_max_f16 v2, v2, v18
-; GFX10-NEXT: v_pk_max_f16 v3, v3, v19
-; GFX10-NEXT: v_pk_max_f16 v4, v4, v20
-; GFX10-NEXT: v_pk_max_f16 v5, v5, v21
-; GFX10-NEXT: v_pk_max_f16 v6, v6, v22
-; GFX10-NEXT: v_pk_max_f16 v7, v7, v23
-; GFX10-NEXT: v_pk_max_f16 v8, v8, v24
-; GFX10-NEXT: v_pk_max_f16 v9, v9, v25
-; GFX10-NEXT: v_pk_max_f16 v10, v10, v26
-; GFX10-NEXT: v_pk_max_f16 v11, v11, v27
-; GFX10-NEXT: v_pk_max_f16 v12, v12, v28
-; GFX10-NEXT: v_pk_max_f16 v13, v13, v29
-; GFX10-NEXT: v_pk_max_f16 v14, v14, v30
-; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: v_pk_max_f16 v16, v31, v31
-; GFX10-NEXT: v_pk_max_f16 v15, v15, v16
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: v_maximumnum_v32f16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: scratch_load_b32 v31, off, s32
-; GFX11-NEXT: v_pk_max_f16 v16, v16, v16
-; GFX11-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX11-NEXT: v_pk_max_f16 v17, v17, v17
-; GFX11-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX11-NEXT: v_pk_max_f16 v18, v18, v18
-; GFX11-NEXT: v_pk_max_f16 v2, v2, v2
-; GFX11-NEXT: v_pk_max_f16 v19, v19, v19
-; GFX11-NEXT: v_pk_max_f16 v3, v3, v3
-; GFX11-NEXT: v_pk_max_f16 v20, v20, v20
-; GFX11-NEXT: v_pk_max_f16 v4, v4, v4
-; GFX11-NEXT: v_pk_max_f16 v21, v21, v21
-; GFX11-NEXT: v_pk_max_f16 v5, v5, v5
-; GFX11-NEXT: v_pk_max_f16 v22, v22, v22
-; GFX11-NEXT: v_pk_max_f16 v6, v6, v6
-; GFX11-NEXT: v_pk_max_f16 v23, v23, v23
-; GFX11-NEXT: v_pk_max_f16 v7, v7, v7
-; GFX11-NEXT: v_pk_max_f16 v24, v24, v24
-; GFX11-NEXT: v_pk_max_f16 v8, v8, v8
-; GFX11-NEXT: v_pk_max_f16 v25, v25, v25
-; GFX11-NEXT: v_pk_max_f16 v9, v9, v9
-; GFX11-NEXT: v_pk_max_f16 v26, v26, v26
-; GFX11-NEXT: v_pk_max_f16 v10, v10, v10
-; GFX11-NEXT: v_pk_max_f16 v27, v27, v27
-; GFX11-NEXT: v_pk_max_f16 v11, v11, v11
-; GFX11-NEXT: v_pk_max_f16 v28, v28, v28
-; GFX11-NEXT: v_pk_max_f16 v12, v12, v12
-; GFX11-NEXT: v_pk_max_f16 v29, v29, v29
-; GFX11-NEXT: v_pk_max_f16 v13, v13, v13
-; GFX11-NEXT: v_pk_max_f16 v30, v30, v30
-; GFX11-NEXT: v_pk_max_f16 v14, v14, v14
-; GFX11-NEXT: v_pk_max_f16 v15, v15, v15
-; GFX11-NEXT: v_pk_max_f16 v0, v0, v16
-; GFX11-NEXT: v_pk_max_f16 v1, v1, v17
-; GFX11-NEXT: v_pk_max_f16 v2, v2, v18
-; GFX11-NEXT: v_pk_max_f16 v3, v3, v19
-; GFX11-NEXT: v_pk_max_f16 v4, v4, v20
-; GFX11-NEXT: v_pk_max_f16 v5, v5, v21
-; GFX11-NEXT: v_pk_max_f16 v6, v6, v22
-; GFX11-NEXT: v_pk_max_f16 v7, v7, v23
-; GFX11-NEXT: v_pk_max_f16 v8, v8, v24
-; GFX11-NEXT: v_pk_max_f16 v9, v9, v25
-; GFX11-NEXT: v_pk_max_f16 v10, v10, v26
-; GFX11-NEXT: v_pk_max_f16 v11, v11, v27
-; GFX11-NEXT: v_pk_max_f16 v12, v12, v28
-; GFX11-NEXT: v_pk_max_f16 v13, v13, v29
-; GFX11-NEXT: v_pk_max_f16 v14, v14, v30
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_pk_max_f16 v16, v31, v31
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_pk_max_f16 v15, v15, v16
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_maximumnum_v32f16:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: scratch_load_b32 v31, off, s32
-; GFX12-NEXT: v_pk_max_num_f16 v16, v16, v16
-; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v0
-; GFX12-NEXT: v_pk_max_num_f16 v17, v17, v17
-; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v1
-; GFX12-NEXT: v_pk_max_num_f16 v18, v18, v18
-; GFX12-NEXT: v_pk_max_num_f16 v2, v2, v2
-; GFX12-NEXT: v_pk_max_num_f16 v19, v19, v19
-; GFX12-NEXT: v_pk_max_num_f16 v3, v3, v3
-; GFX12-NEXT: v_pk_max_num_f16 v20, v20, v20
-; GFX12-NEXT: v_pk_max_num_f16 v4, v4, v4
-; GFX12-NEXT: v_pk_max_num_f16 v21, v21, v21
-; GFX12-NEXT: v_pk_max_num_f16 v5, v5, v5
-; GFX12-NEXT: v_pk_max_num_f16 v22, v22, v22
-; GFX12-NEXT: v_pk_max_num_f16 v6, v6, v6
-; GFX12-NEXT: v_pk_max_num_f16 v23, v23, v23
-; GFX12-NEXT: v_pk_max_num_f16 v7, v7, v7
-; GFX12-NEXT: v_pk_max_num_f16 v24, v24, v24
-; GFX12-NEXT: v_pk_max_num_f16 v8, v8, v8
-; GFX12-NEXT: v_pk_max_num_f16 v25, v25, v25
-; GFX12-NEXT: v_pk_max_num_f16 v9, v9, v9
-; GFX12-NEXT: v_pk_max_num_f16 v26, v26, v26
-; GFX12-NEXT: v_pk_max_num_f16 v10, v10, v10
-; GFX12-NEXT: v_pk_max_num_f16 v27, v27, v27
-; GFX12-NEXT: v_pk_max_num_f16 v11, v11, v11
-; GFX12-NEXT: v_pk_max_num_f16 v28, v28, v28
-; GFX12-NEXT: v_pk_max_num_f16 v12, v12, v12
-; GFX12-NEXT: v_pk_max_num_f16 v29, v29, v29
-; GFX12-NEXT: v_pk_max_num_f16 v13, v13, v13
-; GFX12-NEXT: v_pk_max_num_f16 v30, v30, v30
-; GFX12-NEXT: v_pk_max_num_f16 v14, v14, v14
-; GFX12-NEXT: v_pk_max_num_f16 v15, v15, v15
-; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v16
-; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v17
-; GFX12-NEXT: v_pk_max_num_f16 v2, v2, v18
-; GFX12-NEXT: v_pk_max_num_f16 v3, v3, v19
-; GFX12-NEXT: v_pk_max_num_f16 v4, v4, v20
-; GFX12-NEXT: v_pk_max_num_f16 v5, v5, v21
-; GFX12-NEXT: v_pk_max_num_f16 v6, v6, v22
-; GFX12-NEXT: v_pk_max_num_f16 v7, v7, v23
-; GFX12-NEXT: v_pk_max_num_f16 v8, v8, v24
-; GFX12-NEXT: v_pk_max_num_f16 v9, v9, v25
-; GFX12-NEXT: v_pk_max_num_f16 v10, v10, v26
-; GFX12-NEXT: v_pk_max_num_f16 v11, v11, v27
-; GFX12-NEXT: v_pk_max_num_f16 v12, v12, v28
-; GFX12-NEXT: v_pk_max_num_f16 v13, v13, v29
-; GFX12-NEXT: v_pk_max_num_f16 v14, v14, v30
-; GFX12-NEXT: s_wait_loadcnt 0x0
-; GFX12-NEXT: v_pk_max_num_f16 v16, v31, v31
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT: v_pk_max_num_f16 v15, v15, v16
-; GFX12-NEXT: s_setpc_b64 s[30:31]
- %result = call <32 x half> @llvm.maximumnum.v32f16(<32 x half> %x, <32 x half> %y)
- ret <32 x half> %result
-}
-
-define <2 x float> @v_maximumnum_v2f32(<2 x float> %x, <2 x float> %y) {
-; GFX7-LABEL: v_maximumnum_v2f32:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_mul_f32_e32 v2, 1.0, v2
-; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
-; GFX7-NEXT: v_max_f32_e32 v0, v0, v2
-; GFX7-NEXT: v_mul_f32_e32 v2, 1.0, v3
-; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
-; GFX7-NEXT: v_max_f32_e32 v1, v1, v2
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_maximumnum_v2f32:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_mul_f32_e32 v2, 1.0, v2
-; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0
-; GFX8-NEXT: v_max_f32_e32 v0, v0, v2
-; GFX8-NEXT: v_mul_f32_e32 v2, 1.0, v3
-; GFX8-NEXT: v_mul_f32_e32 v1, 1.0, v1
-; GFX8-NEXT: v_max_f32_e32 v1, v1, v2
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: v_maximumnum_v2f32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v2, v2, v2
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v2
-; GFX9-NEXT: v_max_f32_e32 v2, v3, v3
-; GFX9-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX9-NEXT: v_max_f32_e32 v1, v1, v2
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_maximumnum_v2f32:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_max_f32_e32 v2, v2, v2
-; GFX10-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX10-NEXT: v_max_f32_e32 v3, v3, v3
-; GFX10-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX10-NEXT: v_max_f32_e32 v0, v0, v2
-; GFX10-NEXT: v_max_f32_e32 v1, v1, v3
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: v_maximumnum_v2f32:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v3, v3, v3
-; GFX11-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_dual_max_f32 v0, v0, v2 :: v_dual_max_f32 v1, v1, v3
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_maximumnum_v2f32:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3
-; GFX12-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT: v_dual_max_num_f32 v0, v0, v2 :: v_dual_max_num_f32 v1, v1, v3
-; GFX12-NEXT: s_setpc_b64 s[30:31]
+define <16 x half> @v_maximumnum_v16f16(<16 x half> %x, <16 x half> %y) {
+; GFX7-SDAG-LABEL: v_maximumnum_v16f16:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v16, v16
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v16, v16
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v16
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v16, v17
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v4, v4
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v17, v20
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v16, v16
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v5, v5
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v6, v6
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GFX7-SDAG-NEXT: v_max_f32_e32 v1, v1, v16
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v16, v18
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v18, v21
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v17, v17
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v16, v16
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v18, v18
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v6, v6
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v7, v7
+; GFX7-SDAG-NEXT: v_max_f32_e32 v2, v2, v16
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v16, v19
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v19, v22
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v20, v23
+; GFX7-SDAG-NEXT: v_max_f32_e32 v4, v4, v17
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v16, v16
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v19, v19
+; GFX7-SDAG-NEXT: v_max_f32_e32 v5, v5, v18
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v8, v8
+; GFX7-SDAG-NEXT: v_max_f32_e32 v3, v3, v16
+; GFX7-SDAG-NEXT: buffer_load_dword v16, off, s[0:3], s32
+; GFX7-SDAG-NEXT: v_max_f32_e32 v6, v6, v19
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v17, v24
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v9, v9
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v18, v25
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v10, v10
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v19, v26
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v7, v7
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v20, v20
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v8, v8
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v17, v17
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v9, v9
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v18, v18
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v10, v10
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v19, v19
+; GFX7-SDAG-NEXT: v_max_f32_e32 v7, v7, v20
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v11, v11
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v20, v27
+; GFX7-SDAG-NEXT: v_max_f32_e32 v8, v8, v17
+; GFX7-SDAG-NEXT: v_max_f32_e32 v9, v9, v18
+; GFX7-SDAG-NEXT: v_max_f32_e32 v10, v10, v19
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v12, v12
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v17, v28
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v13, v13
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v18, v29
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v14, v14
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v19, v30
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v15, v15
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v11, v11
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v20, v20
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v12, v12
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v17, v17
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v13, v13
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v18, v18
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v14, v14
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v19, v19
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v15, v15
+; GFX7-SDAG-NEXT: v_max_f32_e32 v11, v11, v20
+; GFX7-SDAG-NEXT: v_max_f32_e32 v12, v12, v17
+; GFX7-SDAG-NEXT: v_max_f32_e32 v13, v13, v18
+; GFX7-SDAG-NEXT: v_max_f32_e32 v14, v14, v19
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v16, v16
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v16, v16
+; GFX7-SDAG-NEXT: v_max_f32_e32 v15, v15, v16
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_maximumnum_v16f16:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v16, v16
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v16
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v16, v17
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v17, v20
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GFX7-GISEL-NEXT: v_max_f32_e32 v1, v1, v16
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v16, v18
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v18, v21
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v6, v6
+; GFX7-GISEL-NEXT: v_max_f32_e32 v4, v4, v17
+; GFX7-GISEL-NEXT: v_max_f32_e32 v2, v2, v16
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v16, v19
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v19, v22
+; GFX7-GISEL-NEXT: v_max_f32_e32 v5, v5, v18
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v7, v7
+; GFX7-GISEL-NEXT: v_max_f32_e32 v3, v3, v16
+; GFX7-GISEL-NEXT: buffer_load_dword v16, off, s[0:3], s32
+; GFX7-GISEL-NEXT: v_max_f32_e32 v6, v6, v19
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v17, v23
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v8, v8
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v18, v24
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v9, v9
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v19, v25
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v10, v10
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v20, v26
+; GFX7-GISEL-NEXT: v_max_f32_e32 v7, v7, v17
+; GFX7-GISEL-NEXT: v_max_f32_e32 v8, v8, v18
+; GFX7-GISEL-NEXT: v_max_f32_e32 v9, v9, v19
+; GFX7-GISEL-NEXT: v_max_f32_e32 v10, v10, v20
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v11, v11
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v17, v27
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v12, v12
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v18, v28
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v13, v13
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v19, v29
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v14, v14
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v20, v30
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v15, v15
+; GFX7-GISEL-NEXT: v_max_f32_e32 v11, v11, v17
+; GFX7-GISEL-NEXT: v_max_f32_e32 v12, v12, v18
+; GFX7-GISEL-NEXT: v_max_f32_e32 v13, v13, v19
+; GFX7-GISEL-NEXT: v_max_f32_e32 v14, v14, v20
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v4, v4
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v5, v5
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v6, v6
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v7, v7
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v8, v8
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v9, v9
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v10, v10
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v11, v11
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v12, v12
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v13, v13
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v14, v14
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v16, v16
+; GFX7-GISEL-NEXT: v_max_f32_e32 v15, v15, v16
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v15, v15
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_maximumnum_v16f16:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v16, v15, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v17, v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v16, v17, v16 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v17, v14, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v18, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v17, v18, v17 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v18, v13, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v19, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v18, v19, v18 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v19, v12, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v20, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v19, v20, v19 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v20, v11, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v21, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v20, v21, v20 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v21, v10, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v22, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v21, v22, v21 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v22, v9, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v23, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v22, v23, v22 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v23, v8, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v24, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_e32 v15, v15, v15
+; GFX8-SDAG-NEXT: v_max_f16_e32 v7, v7, v7
+; GFX8-SDAG-NEXT: v_max_f16_e32 v14, v14, v14
+; GFX8-SDAG-NEXT: v_max_f16_e32 v6, v6, v6
+; GFX8-SDAG-NEXT: v_max_f16_e32 v13, v13, v13
+; GFX8-SDAG-NEXT: v_max_f16_e32 v5, v5, v5
+; GFX8-SDAG-NEXT: v_max_f16_e32 v12, v12, v12
+; GFX8-SDAG-NEXT: v_max_f16_e32 v4, v4, v4
+; GFX8-SDAG-NEXT: v_max_f16_e32 v11, v11, v11
+; GFX8-SDAG-NEXT: v_max_f16_e32 v3, v3, v3
+; GFX8-SDAG-NEXT: v_max_f16_e32 v10, v10, v10
+; GFX8-SDAG-NEXT: v_max_f16_e32 v2, v2, v2
+; GFX8-SDAG-NEXT: v_max_f16_e32 v9, v9, v9
+; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX8-SDAG-NEXT: v_max_f16_e32 v8, v8, v8
+; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v23, v24, v23 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_max_f16_e32 v7, v7, v15
+; GFX8-SDAG-NEXT: v_max_f16_e32 v6, v6, v14
+; GFX8-SDAG-NEXT: v_max_f16_e32 v5, v5, v13
+; GFX8-SDAG-NEXT: v_max_f16_e32 v4, v4, v12
+; GFX8-SDAG-NEXT: v_max_f16_e32 v3, v3, v11
+; GFX8-SDAG-NEXT: v_max_f16_e32 v2, v2, v10
+; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v9
+; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v8
+; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v23
+; GFX8-SDAG-NEXT: v_or_b32_e32 v1, v1, v22
+; GFX8-SDAG-NEXT: v_or_b32_e32 v2, v2, v21
+; GFX8-SDAG-NEXT: v_or_b32_e32 v3, v3, v20
+; GFX8-SDAG-NEXT: v_or_b32_e32 v4, v4, v19
+; GFX8-SDAG-NEXT: v_or_b32_e32 v5, v5, v18
+; GFX8-SDAG-NEXT: v_or_b32_e32 v6, v6, v17
+; GFX8-SDAG-NEXT: v_or_b32_e32 v7, v7, v16
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_maximumnum_v16f16:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_max_f16_e32 v16, v0, v0
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v19, v8, v8
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v8, v8, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v17, v1, v1
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_max_f16_e32 v8, v9, v9
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v9, v9, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v18, v2, v2
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v2, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v1, v1, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_max_f16_e32 v9, v10, v10
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v10, v10, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v16, v16, v19
+; GFX8-GISEL-NEXT: v_max_f16_e32 v19, v3, v3
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v3, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v2, v2, v10 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_max_f16_e32 v10, v11, v11
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v11, v11, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v8, v17, v8
+; GFX8-GISEL-NEXT: v_max_f16_e32 v17, v4, v4
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v4, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v3, v3, v11 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_max_f16_e32 v11, v12, v12
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v12, v12, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v9, v18, v9
+; GFX8-GISEL-NEXT: v_max_f16_e32 v18, v5, v5
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v5, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v4, v4, v12 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_max_f16_e32 v12, v13, v13
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v13, v13, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v10, v19, v10
+; GFX8-GISEL-NEXT: v_max_f16_e32 v19, v6, v6
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v6, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v11, v17, v11
+; GFX8-GISEL-NEXT: v_max_f16_e32 v17, v7, v7
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v7, v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v12, v18, v12
+; GFX8-GISEL-NEXT: v_max_f16_e32 v18, v14, v14
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v14, v14, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v5, v5, v13 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_max_f16_e32 v13, v15, v15
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v15, v15, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v18, v19, v18
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v6, v6, v14 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_max_f16_e32 v13, v17, v13
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v7, v7, v15 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v16, v0
+; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v8, v1
+; GFX8-GISEL-NEXT: v_or_b32_e32 v2, v9, v2
+; GFX8-GISEL-NEXT: v_or_b32_e32 v3, v10, v3
+; GFX8-GISEL-NEXT: v_or_b32_e32 v4, v11, v4
+; GFX8-GISEL-NEXT: v_or_b32_e32 v5, v12, v5
+; GFX8-GISEL-NEXT: v_or_b32_e32 v6, v18, v6
+; GFX8-GISEL-NEXT: v_or_b32_e32 v7, v13, v7
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_maximumnum_v16f16:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_pk_max_f16 v8, v8, v8
+; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v8
+; GFX900-SDAG-NEXT: v_pk_max_f16 v8, v9, v9
+; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v8
+; GFX900-SDAG-NEXT: v_pk_max_f16 v8, v10, v10
+; GFX900-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX900-SDAG-NEXT: v_pk_max_f16 v2, v2, v8
+; GFX900-SDAG-NEXT: v_pk_max_f16 v8, v11, v11
+; GFX900-SDAG-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX900-SDAG-NEXT: v_pk_max_f16 v3, v3, v8
+; GFX900-SDAG-NEXT: v_pk_max_f16 v8, v12, v12
+; GFX900-SDAG-NEXT: v_pk_max_f16 v4, v4, v4
+; GFX900-SDAG-NEXT: v_pk_max_f16 v4, v4, v8
+; GFX900-SDAG-NEXT: v_pk_max_f16 v8, v13, v13
+; GFX900-SDAG-NEXT: v_pk_max_f16 v5, v5, v5
+; GFX900-SDAG-NEXT: v_pk_max_f16 v5, v5, v8
+; GFX900-SDAG-NEXT: v_pk_max_f16 v8, v14, v14
+; GFX900-SDAG-NEXT: v_pk_max_f16 v6, v6, v6
+; GFX900-SDAG-NEXT: v_pk_max_f16 v6, v6, v8
+; GFX900-SDAG-NEXT: v_pk_max_f16 v8, v15, v15
+; GFX900-SDAG-NEXT: v_pk_max_f16 v7, v7, v7
+; GFX900-SDAG-NEXT: v_pk_max_f16 v7, v7, v8
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: v_maximumnum_v16f16:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX9-GISEL-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX9-GISEL-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX9-GISEL-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX9-GISEL-NEXT: v_pk_max_f16 v4, v4, v4
+; GFX9-GISEL-NEXT: v_pk_max_f16 v5, v5, v5
+; GFX9-GISEL-NEXT: v_pk_max_f16 v6, v6, v6
+; GFX9-GISEL-NEXT: v_pk_max_f16 v7, v7, v7
+; GFX9-GISEL-NEXT: v_pk_max_f16 v8, v8, v8
+; GFX9-GISEL-NEXT: v_pk_max_f16 v9, v9, v9
+; GFX9-GISEL-NEXT: v_pk_max_f16 v10, v10, v10
+; GFX9-GISEL-NEXT: v_pk_max_f16 v11, v11, v11
+; GFX9-GISEL-NEXT: v_pk_max_f16 v12, v12, v12
+; GFX9-GISEL-NEXT: v_pk_max_f16 v13, v13, v13
+; GFX9-GISEL-NEXT: v_pk_max_f16 v14, v14, v14
+; GFX9-GISEL-NEXT: v_pk_max_f16 v15, v15, v15
+; GFX9-GISEL-NEXT: v_pk_max_f16 v0, v0, v8
+; GFX9-GISEL-NEXT: v_pk_max_f16 v1, v1, v9
+; GFX9-GISEL-NEXT: v_pk_max_f16 v2, v2, v10
+; GFX9-GISEL-NEXT: v_pk_max_f16 v3, v3, v11
+; GFX9-GISEL-NEXT: v_pk_max_f16 v4, v4, v12
+; GFX9-GISEL-NEXT: v_pk_max_f16 v5, v5, v13
+; GFX9-GISEL-NEXT: v_pk_max_f16 v6, v6, v14
+; GFX9-GISEL-NEXT: v_pk_max_f16 v7, v7, v15
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_maximumnum_v16f16:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_pk_max_f16 v8, v8, v8
+; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v8
+; GFX950-SDAG-NEXT: v_pk_max_f16 v8, v9, v9
+; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v8
+; GFX950-SDAG-NEXT: v_pk_max_f16 v8, v10, v10
+; GFX950-SDAG-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v2, v8
+; GFX950-SDAG-NEXT: v_pk_max_f16 v8, v11, v11
+; GFX950-SDAG-NEXT: v_pk_max_f16 v4, v4, v4
+; GFX950-SDAG-NEXT: v_pk_max_f16 v3, v3, v8
+; GFX950-SDAG-NEXT: v_pk_max_f16 v8, v12, v12
+; GFX950-SDAG-NEXT: v_pk_max_f16 v5, v5, v5
+; GFX950-SDAG-NEXT: v_pk_max_f16 v4, v4, v8
+; GFX950-SDAG-NEXT: v_pk_max_f16 v8, v13, v13
+; GFX950-SDAG-NEXT: v_pk_max_f16 v6, v6, v6
+; GFX950-SDAG-NEXT: v_pk_max_f16 v5, v5, v8
+; GFX950-SDAG-NEXT: v_pk_max_f16 v8, v14, v14
+; GFX950-SDAG-NEXT: v_pk_max_f16 v7, v7, v7
+; GFX950-SDAG-NEXT: v_pk_max_f16 v6, v6, v8
+; GFX950-SDAG-NEXT: v_pk_max_f16 v8, v15, v15
+; GFX950-SDAG-NEXT: s_nop 0
+; GFX950-SDAG-NEXT: v_pk_max_f16 v7, v7, v8
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_maximumnum_v16f16:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_pk_max_f16 v8, v8, v8
+; GFX10-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX10-SDAG-NEXT: v_pk_max_f16 v9, v9, v9
+; GFX10-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX10-SDAG-NEXT: v_pk_max_f16 v10, v10, v10
+; GFX10-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX10-SDAG-NEXT: v_pk_max_f16 v0, v0, v8
+; GFX10-SDAG-NEXT: v_pk_max_f16 v8, v11, v11
+; GFX10-SDAG-NEXT: v_pk_max_f16 v1, v1, v9
+; GFX10-SDAG-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX10-SDAG-NEXT: v_pk_max_f16 v2, v2, v10
+; GFX10-SDAG-NEXT: v_pk_max_f16 v9, v12, v12
+; GFX10-SDAG-NEXT: v_pk_max_f16 v4, v4, v4
+; GFX10-SDAG-NEXT: v_pk_max_f16 v10, v13, v13
+; GFX10-SDAG-NEXT: v_pk_max_f16 v5, v5, v5
+; GFX10-SDAG-NEXT: v_pk_max_f16 v11, v14, v14
+; GFX10-SDAG-NEXT: v_pk_max_f16 v6, v6, v6
+; GFX10-SDAG-NEXT: v_pk_max_f16 v12, v15, v15
+; GFX10-SDAG-NEXT: v_pk_max_f16 v7, v7, v7
+; GFX10-SDAG-NEXT: v_pk_max_f16 v3, v3, v8
+; GFX10-SDAG-NEXT: v_pk_max_f16 v4, v4, v9
+; GFX10-SDAG-NEXT: v_pk_max_f16 v5, v5, v10
+; GFX10-SDAG-NEXT: v_pk_max_f16 v6, v6, v11
+; GFX10-SDAG-NEXT: v_pk_max_f16 v7, v7, v12
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_maximumnum_v16f16:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX10-GISEL-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX10-GISEL-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX10-GISEL-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX10-GISEL-NEXT: v_pk_max_f16 v4, v4, v4
+; GFX10-GISEL-NEXT: v_pk_max_f16 v5, v5, v5
+; GFX10-GISEL-NEXT: v_pk_max_f16 v6, v6, v6
+; GFX10-GISEL-NEXT: v_pk_max_f16 v7, v7, v7
+; GFX10-GISEL-NEXT: v_pk_max_f16 v8, v8, v8
+; GFX10-GISEL-NEXT: v_pk_max_f16 v9, v9, v9
+; GFX10-GISEL-NEXT: v_pk_max_f16 v10, v10, v10
+; GFX10-GISEL-NEXT: v_pk_max_f16 v11, v11, v11
+; GFX10-GISEL-NEXT: v_pk_max_f16 v12, v12, v12
+; GFX10-GISEL-NEXT: v_pk_max_f16 v13, v13, v13
+; GFX10-GISEL-NEXT: v_pk_max_f16 v14, v14, v14
+; GFX10-GISEL-NEXT: v_pk_max_f16 v15, v15, v15
+; GFX10-GISEL-NEXT: v_pk_max_f16 v0, v0, v8
+; GFX10-GISEL-NEXT: v_pk_max_f16 v1, v1, v9
+; GFX10-GISEL-NEXT: v_pk_max_f16 v2, v2, v10
+; GFX10-GISEL-NEXT: v_pk_max_f16 v3, v3, v11
+; GFX10-GISEL-NEXT: v_pk_max_f16 v4, v4, v12
+; GFX10-GISEL-NEXT: v_pk_max_f16 v5, v5, v13
+; GFX10-GISEL-NEXT: v_pk_max_f16 v6, v6, v14
+; GFX10-GISEL-NEXT: v_pk_max_f16 v7, v7, v15
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_maximumnum_v16f16:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_pk_max_f16 v8, v8, v8
+; GFX11-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX11-SDAG-NEXT: v_pk_max_f16 v9, v9, v9
+; GFX11-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX11-SDAG-NEXT: v_pk_max_f16 v10, v10, v10
+; GFX11-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX11-SDAG-NEXT: v_pk_max_f16 v0, v0, v8
+; GFX11-SDAG-NEXT: v_pk_max_f16 v8, v11, v11
+; GFX11-SDAG-NEXT: v_pk_max_f16 v1, v1, v9
+; GFX11-SDAG-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX11-SDAG-NEXT: v_pk_max_f16 v2, v2, v10
+; GFX11-SDAG-NEXT: v_pk_max_f16 v9, v12, v12
+; GFX11-SDAG-NEXT: v_pk_max_f16 v4, v4, v4
+; GFX11-SDAG-NEXT: v_pk_max_f16 v10, v13, v13
+; GFX11-SDAG-NEXT: v_pk_max_f16 v5, v5, v5
+; GFX11-SDAG-NEXT: v_pk_max_f16 v11, v14, v14
+; GFX11-SDAG-NEXT: v_pk_max_f16 v6, v6, v6
+; GFX11-SDAG-NEXT: v_pk_max_f16 v12, v15, v15
+; GFX11-SDAG-NEXT: v_pk_max_f16 v7, v7, v7
+; GFX11-SDAG-NEXT: v_pk_max_f16 v3, v3, v8
+; GFX11-SDAG-NEXT: v_pk_max_f16 v4, v4, v9
+; GFX11-SDAG-NEXT: v_pk_max_f16 v5, v5, v10
+; GFX11-SDAG-NEXT: v_pk_max_f16 v6, v6, v11
+; GFX11-SDAG-NEXT: v_pk_max_f16 v7, v7, v12
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_maximumnum_v16f16:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX11-GISEL-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX11-GISEL-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX11-GISEL-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX11-GISEL-NEXT: v_pk_max_f16 v4, v4, v4
+; GFX11-GISEL-NEXT: v_pk_max_f16 v5, v5, v5
+; GFX11-GISEL-NEXT: v_pk_max_f16 v6, v6, v6
+; GFX11-GISEL-NEXT: v_pk_max_f16 v7, v7, v7
+; GFX11-GISEL-NEXT: v_pk_max_f16 v8, v8, v8
+; GFX11-GISEL-NEXT: v_pk_max_f16 v9, v9, v9
+; GFX11-GISEL-NEXT: v_pk_max_f16 v10, v10, v10
+; GFX11-GISEL-NEXT: v_pk_max_f16 v11, v11, v11
+; GFX11-GISEL-NEXT: v_pk_max_f16 v12, v12, v12
+; GFX11-GISEL-NEXT: v_pk_max_f16 v13, v13, v13
+; GFX11-GISEL-NEXT: v_pk_max_f16 v14, v14, v14
+; GFX11-GISEL-NEXT: v_pk_max_f16 v15, v15, v15
+; GFX11-GISEL-NEXT: v_pk_max_f16 v0, v0, v8
+; GFX11-GISEL-NEXT: v_pk_max_f16 v1, v1, v9
+; GFX11-GISEL-NEXT: v_pk_max_f16 v2, v2, v10
+; GFX11-GISEL-NEXT: v_pk_max_f16 v3, v3, v11
+; GFX11-GISEL-NEXT: v_pk_max_f16 v4, v4, v12
+; GFX11-GISEL-NEXT: v_pk_max_f16 v5, v5, v13
+; GFX11-GISEL-NEXT: v_pk_max_f16 v6, v6, v14
+; GFX11-GISEL-NEXT: v_pk_max_f16 v7, v7, v15
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-SDAG-LABEL: v_maximumnum_v16f16:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v8, v8, v8
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v9, v9, v9
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v10, v10, v10
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v2
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v8
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v8, v11, v11
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v9
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v3
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v10
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v9, v12, v12
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v4, v4, v4
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v10, v13, v13
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v5, v5, v5
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v11, v14, v14
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v6, v6, v6
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v12, v15, v15
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v7, v7, v7
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v8
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v4, v4, v9
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v5, v5, v10
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v6, v6, v11
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v7, v7, v12
+; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GISEL-LABEL: v_maximumnum_v16f16:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v2
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v3
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v4, v4, v4
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v5, v5, v5
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v6, v6, v6
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v7, v7, v7
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v8, v8, v8
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v9, v9, v9
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v10, v10, v10
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v11, v11, v11
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v12, v12, v12
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v13, v13, v13
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v14, v14, v14
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v15, v15, v15
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v8
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v9
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v10
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v11
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v4, v4, v12
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v5, v5, v13
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v6, v6, v14
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v7, v7, v15
+; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
+ %result = call <16 x half> @llvm.maximumnum.v16f16(<16 x half> %x, <16 x half> %y)
+ ret <16 x half> %result
+}
+
+define <32 x half> @v_maximumnum_v32f16(<32 x half> %x, <32 x half> %y) {
+; GFX7-SDAG-LABEL: v_maximumnum_v32f16:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v4, v4
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v5, v5
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v6, v6
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v7, v7
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v6, v6
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v7, v7
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v8, v8
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v9, v9
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v10, v10
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v11, v11
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v8, v8
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v9, v9
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v10, v10
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v11, v11
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v12, v12
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v13, v13
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v14, v14
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v15, v15
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v12, v12
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v13, v13
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v14, v14
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v15, v15
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v16, v16
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v17, v17
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v18, v18
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v19, v19
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v16, v16
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v17, v17
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v18, v18
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v19, v19
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v20, v20
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v21, v21
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v22, v22
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v23, v23
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v20, v20
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v21, v21
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v22, v22
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v23, v23
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v24, v24
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v25, v25
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v26, v26
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v27, v27
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v24, v24
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v25, v25
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v26, v26
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v27, v27
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v28, v28
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v29, v29
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v30, v30
+; GFX7-SDAG-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:128
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v28, v28
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v29, v29
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v30, v30
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v31
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:8
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v32, v32
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v32, v32
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_max_f32_e32 v1, v1, v31
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:12
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_max_f32_e32 v2, v2, v31
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:16
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_max_f32_e32 v3, v3, v31
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:20
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_max_f32_e32 v4, v4, v31
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:24
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_max_f32_e32 v5, v5, v31
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_max_f32_e32 v6, v6, v31
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:32
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_max_f32_e32 v7, v7, v31
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:36
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_max_f32_e32 v8, v8, v31
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:40
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_max_f32_e32 v9, v9, v31
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:44
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_max_f32_e32 v10, v10, v31
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:48
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_max_f32_e32 v11, v11, v31
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:52
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_max_f32_e32 v12, v12, v31
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:56
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_max_f32_e32 v13, v13, v31
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:60
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_max_f32_e32 v14, v14, v31
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_max_f32_e32 v15, v15, v31
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:68
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_max_f32_e32 v16, v16, v31
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:72
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_max_f32_e32 v17, v17, v31
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:76
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_max_f32_e32 v18, v18, v31
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:80
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_max_f32_e32 v19, v19, v31
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:84
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_max_f32_e32 v20, v20, v31
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:88
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_max_f32_e32 v21, v21, v31
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:92
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_max_f32_e32 v22, v22, v31
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:96
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_max_f32_e32 v23, v23, v31
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:100
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_max_f32_e32 v24, v24, v31
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:104
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_max_f32_e32 v25, v25, v31
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:108
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_max_f32_e32 v26, v26, v31
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:112
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_max_f32_e32 v27, v27, v31
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_max_f32_e32 v28, v28, v31
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:120
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_max_f32_e32 v29, v29, v31
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:124
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_max_f32_e32 v30, v30, v31
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_max_f32_e32 v31, v31, v32
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_maximumnum_v32f16:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v6, v6
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v7, v7
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v8, v8
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v9, v9
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v10, v10
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v11, v11
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v12, v12
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v13, v13
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v14, v14
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v15, v15
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v16, v16
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v17, v17
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v18, v18
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v19, v19
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v20, v20
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v21, v21
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v22, v22
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v23, v23
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v24, v24
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v25, v25
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v26, v26
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v27, v27
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v28, v28
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v29, v29
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v30, v30
+; GFX7-GISEL-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:128
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v31
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:8
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v32, v32
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_max_f32_e32 v1, v1, v31
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:12
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_max_f32_e32 v2, v2, v31
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:16
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_max_f32_e32 v3, v3, v31
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:20
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_max_f32_e32 v4, v4, v31
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:24
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v4, v4
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_max_f32_e32 v5, v5, v31
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v5, v5
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_max_f32_e32 v6, v6, v31
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:32
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v6, v6
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_max_f32_e32 v7, v7, v31
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:36
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v7, v7
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_max_f32_e32 v8, v8, v31
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:40
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v8, v8
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_max_f32_e32 v9, v9, v31
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:44
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v9, v9
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_max_f32_e32 v10, v10, v31
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:48
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v10, v10
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_max_f32_e32 v11, v11, v31
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:52
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v11, v11
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_max_f32_e32 v12, v12, v31
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:56
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v12, v12
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_max_f32_e32 v13, v13, v31
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:60
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v13, v13
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_max_f32_e32 v14, v14, v31
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:64
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v14, v14
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_max_f32_e32 v15, v15, v31
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:68
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v15, v15
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_max_f32_e32 v16, v16, v31
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:72
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v16, v16
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_max_f32_e32 v17, v17, v31
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:76
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v17, v17
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_max_f32_e32 v18, v18, v31
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:80
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v18, v18
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_max_f32_e32 v19, v19, v31
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:84
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v19, v19
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_max_f32_e32 v20, v20, v31
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:88
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v20, v20
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_max_f32_e32 v21, v21, v31
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:92
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v21, v21
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_max_f32_e32 v22, v22, v31
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:96
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v22, v22
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_max_f32_e32 v23, v23, v31
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:100
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v23, v23
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_max_f32_e32 v24, v24, v31
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:104
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v24, v24
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_max_f32_e32 v25, v25, v31
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:108
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v25, v25
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_max_f32_e32 v26, v26, v31
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:112
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v26, v26
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_max_f32_e32 v27, v27, v31
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v27, v27
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_max_f32_e32 v28, v28, v31
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:120
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v28, v28
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_max_f32_e32 v29, v29, v31
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:124
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v29, v29
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_max_f32_e32 v30, v30, v31
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v30, v30
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_max_f32_e32 v31, v31, v32
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_maximumnum_v32f16:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
+; GFX8-SDAG-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
+; GFX8-SDAG-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
+; GFX8-SDAG-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
+; GFX8-SDAG-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
+; GFX8-SDAG-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
+; GFX8-SDAG-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
+; GFX8-SDAG-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
+; GFX8-SDAG-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+; GFX8-SDAG-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; GFX8-SDAG-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; GFX8-SDAG-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v38, v27, v27 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v39, v11, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v48, v26, v26 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v49, v10, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v50, v25, v25 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v51, v9, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v40, v22, v22 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v41, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v58, v17, v17 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v59, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_e32 v17, v17, v17
+; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v52, v24, v24 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v53, v8, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v54, v23, v23 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v55, v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v42, v21, v21 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v43, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v44, v20, v20 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v45, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v46, v19, v19 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v47, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v56, v18, v18 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v57, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v38, v39, v38 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v39, v49, v48 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v48, v51, v50 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v51, v41, v40 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v40, v59, v58 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v17
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v49, v53, v52 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v50, v55, v54 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v52, v43, v42 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v53, v45, v44 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v54, v47, v46 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v55, v57, v56 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_or_b32_e32 v1, v1, v40
+; GFX8-SDAG-NEXT: buffer_load_dword v59, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; GFX8-SDAG-NEXT: buffer_load_dword v58, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
+; GFX8-SDAG-NEXT: buffer_load_dword v57, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
+; GFX8-SDAG-NEXT: buffer_load_dword v56, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
+; GFX8-SDAG-NEXT: buffer_load_dword v47, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
+; GFX8-SDAG-NEXT: buffer_load_dword v46, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
+; GFX8-SDAG-NEXT: buffer_load_dword v45, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
+; GFX8-SDAG-NEXT: buffer_load_dword v44, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
+; GFX8-SDAG-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
+; GFX8-SDAG-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
+; GFX8-SDAG-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
+; GFX8-SDAG-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
+; GFX8-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v32, v30, v30 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v33, v14, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v34, v29, v29 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v35, v13, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v36, v28, v28 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v37, v12, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v32, v33, v32 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v33, v16, v16 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v34, v35, v34 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v35, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v36, v37, v36 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v37, v15, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_e32 v15, v15, v15
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v33, v35, v33 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_max_f16_e32 v30, v30, v30
+; GFX8-SDAG-NEXT: v_max_f16_e32 v14, v14, v14
+; GFX8-SDAG-NEXT: v_max_f16_e32 v29, v29, v29
+; GFX8-SDAG-NEXT: v_max_f16_e32 v13, v13, v13
+; GFX8-SDAG-NEXT: v_max_f16_e32 v28, v28, v28
+; GFX8-SDAG-NEXT: v_max_f16_e32 v12, v12, v12
+; GFX8-SDAG-NEXT: v_max_f16_e32 v27, v27, v27
+; GFX8-SDAG-NEXT: v_max_f16_e32 v11, v11, v11
+; GFX8-SDAG-NEXT: v_max_f16_e32 v26, v26, v26
+; GFX8-SDAG-NEXT: v_max_f16_e32 v10, v10, v10
+; GFX8-SDAG-NEXT: v_max_f16_e32 v25, v25, v25
+; GFX8-SDAG-NEXT: v_max_f16_e32 v9, v9, v9
+; GFX8-SDAG-NEXT: v_max_f16_e32 v24, v24, v24
+; GFX8-SDAG-NEXT: v_max_f16_e32 v8, v8, v8
+; GFX8-SDAG-NEXT: v_max_f16_e32 v23, v23, v23
+; GFX8-SDAG-NEXT: v_max_f16_e32 v7, v7, v7
+; GFX8-SDAG-NEXT: v_max_f16_e32 v22, v22, v22
+; GFX8-SDAG-NEXT: v_max_f16_e32 v6, v6, v6
+; GFX8-SDAG-NEXT: v_max_f16_e32 v21, v21, v21
+; GFX8-SDAG-NEXT: v_max_f16_e32 v5, v5, v5
+; GFX8-SDAG-NEXT: v_max_f16_e32 v20, v20, v20
+; GFX8-SDAG-NEXT: v_max_f16_e32 v4, v4, v4
+; GFX8-SDAG-NEXT: v_max_f16_e32 v19, v19, v19
+; GFX8-SDAG-NEXT: v_max_f16_e32 v3, v3, v3
+; GFX8-SDAG-NEXT: v_max_f16_e32 v18, v18, v18
+; GFX8-SDAG-NEXT: v_max_f16_e32 v2, v2, v2
+; GFX8-SDAG-NEXT: v_max_f16_e32 v16, v16, v16
+; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX8-SDAG-NEXT: v_max_f16_e32 v14, v14, v30
+; GFX8-SDAG-NEXT: v_max_f16_e32 v13, v13, v29
+; GFX8-SDAG-NEXT: v_max_f16_e32 v12, v12, v28
+; GFX8-SDAG-NEXT: v_max_f16_e32 v11, v11, v27
+; GFX8-SDAG-NEXT: v_max_f16_e32 v10, v10, v26
+; GFX8-SDAG-NEXT: v_max_f16_e32 v9, v9, v25
+; GFX8-SDAG-NEXT: v_max_f16_e32 v8, v8, v24
+; GFX8-SDAG-NEXT: v_max_f16_e32 v7, v7, v23
+; GFX8-SDAG-NEXT: v_max_f16_e32 v6, v6, v22
+; GFX8-SDAG-NEXT: v_max_f16_e32 v5, v5, v21
+; GFX8-SDAG-NEXT: v_max_f16_e32 v4, v4, v20
+; GFX8-SDAG-NEXT: v_max_f16_e32 v3, v3, v19
+; GFX8-SDAG-NEXT: v_max_f16_e32 v2, v2, v18
+; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v16
+; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v33
+; GFX8-SDAG-NEXT: v_or_b32_e32 v2, v2, v55
+; GFX8-SDAG-NEXT: v_or_b32_e32 v3, v3, v54
+; GFX8-SDAG-NEXT: v_or_b32_e32 v4, v4, v53
+; GFX8-SDAG-NEXT: v_or_b32_e32 v5, v5, v52
+; GFX8-SDAG-NEXT: v_or_b32_e32 v6, v6, v51
+; GFX8-SDAG-NEXT: v_or_b32_e32 v7, v7, v50
+; GFX8-SDAG-NEXT: v_or_b32_e32 v8, v8, v49
+; GFX8-SDAG-NEXT: v_or_b32_e32 v9, v9, v48
+; GFX8-SDAG-NEXT: v_or_b32_e32 v10, v10, v39
+; GFX8-SDAG-NEXT: v_or_b32_e32 v11, v11, v38
+; GFX8-SDAG-NEXT: v_or_b32_e32 v12, v12, v36
+; GFX8-SDAG-NEXT: v_or_b32_e32 v13, v13, v34
+; GFX8-SDAG-NEXT: v_or_b32_e32 v14, v14, v32
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v35, v31, v31 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_e32 v31, v31, v31
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v35, v37, v35 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_max_f16_e32 v15, v15, v31
+; GFX8-SDAG-NEXT: v_or_b32_e32 v15, v15, v35
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_maximumnum_v32f16:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_max_f16_e32 v31, v0, v0
+; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v16, v16
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v16, v16, v16 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v31, v31, v32
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v16 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_max_f16_e32 v16, v1, v1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v17, v17
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v17, v17, v17 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v16, v16, v32
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v1, v1, v17 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_max_f16_e32 v17, v2, v2
+; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v18, v18
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v2, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v18, v18, v18 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v17, v17, v32
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v2, v2, v18 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_max_f16_e32 v18, v3, v3
+; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v19, v19
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v3, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v19, v19, v19 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v18, v18, v32
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v3, v3, v19 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_max_f16_e32 v19, v4, v4
+; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v20, v20
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v4, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v20, v20, v20 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v19, v19, v32
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v4, v4, v20 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_max_f16_e32 v20, v5, v5
+; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v21, v21
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v5, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v21, v21, v21 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v20, v20, v32
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v5, v5, v21 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_max_f16_e32 v21, v6, v6
+; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v22, v22
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v6, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v22, v22, v22 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v21, v21, v32
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v6, v6, v22 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_max_f16_e32 v22, v7, v7
+; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v23, v23
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v7, v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v23, v23, v23 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v22, v22, v32
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v7, v7, v23 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_max_f16_e32 v23, v8, v8
+; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v24, v24
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v8, v8, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v24, v24, v24 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v23, v23, v32
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v8, v8, v24 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_max_f16_e32 v24, v9, v9
+; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v25, v25
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v9, v9, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v25, v25, v25 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v24, v24, v32
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v9, v9, v25 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_max_f16_e32 v25, v10, v10
+; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v26, v26
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v10, v10, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v26, v26, v26 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v25, v25, v32
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v10, v10, v26 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_max_f16_e32 v26, v11, v11
+; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v27, v27
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v11, v11, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v27, v27, v27 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v26, v26, v32
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v11, v11, v27 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_max_f16_e32 v27, v12, v12
+; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v28, v28
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v12, v12, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v28, v28, v28 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v27, v27, v32
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v12, v12, v28 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_max_f16_e32 v28, v13, v13
+; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v29, v29
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v13, v13, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v29, v29, v29 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v28, v28, v32
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v13, v13, v29 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_max_f16_e32 v29, v14, v14
+; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v30, v30
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v14, v14, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v30, v30, v30 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v14, v14, v30 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: buffer_load_dword v30, off, s[0:3], s32
+; GFX8-GISEL-NEXT: v_max_f16_e32 v29, v29, v32
+; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v15, v15
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v15, v15, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v31, v0
+; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v16, v1
+; GFX8-GISEL-NEXT: v_or_b32_e32 v2, v17, v2
+; GFX8-GISEL-NEXT: v_or_b32_e32 v3, v18, v3
+; GFX8-GISEL-NEXT: v_or_b32_e32 v4, v19, v4
+; GFX8-GISEL-NEXT: v_or_b32_e32 v5, v20, v5
+; GFX8-GISEL-NEXT: v_or_b32_e32 v6, v21, v6
+; GFX8-GISEL-NEXT: v_or_b32_e32 v7, v22, v7
+; GFX8-GISEL-NEXT: v_or_b32_e32 v8, v23, v8
+; GFX8-GISEL-NEXT: v_or_b32_e32 v9, v24, v9
+; GFX8-GISEL-NEXT: v_or_b32_e32 v10, v25, v10
+; GFX8-GISEL-NEXT: v_or_b32_e32 v11, v26, v11
+; GFX8-GISEL-NEXT: v_or_b32_e32 v12, v27, v12
+; GFX8-GISEL-NEXT: v_or_b32_e32 v13, v28, v13
+; GFX8-GISEL-NEXT: v_or_b32_e32 v14, v29, v14
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: v_max_f16_e32 v33, v30, v30
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v30, v30, v30 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v32, v33
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v15, v15, v30 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_or_b32_e32 v15, v32, v15
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_maximumnum_v32f16:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_pk_max_f16 v16, v16, v16
+; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v16
+; GFX900-SDAG-NEXT: v_pk_max_f16 v16, v17, v17
+; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v16
+; GFX900-SDAG-NEXT: v_pk_max_f16 v16, v18, v18
+; GFX900-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX900-SDAG-NEXT: v_pk_max_f16 v2, v2, v16
+; GFX900-SDAG-NEXT: v_pk_max_f16 v16, v19, v19
+; GFX900-SDAG-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX900-SDAG-NEXT: v_pk_max_f16 v3, v3, v16
+; GFX900-SDAG-NEXT: buffer_load_dword v16, off, s[0:3], s32
+; GFX900-SDAG-NEXT: v_pk_max_f16 v17, v20, v20
+; GFX900-SDAG-NEXT: v_pk_max_f16 v4, v4, v4
+; GFX900-SDAG-NEXT: v_pk_max_f16 v18, v21, v21
+; GFX900-SDAG-NEXT: v_pk_max_f16 v5, v5, v5
+; GFX900-SDAG-NEXT: v_pk_max_f16 v19, v22, v22
+; GFX900-SDAG-NEXT: v_pk_max_f16 v6, v6, v6
+; GFX900-SDAG-NEXT: v_pk_max_f16 v20, v23, v23
+; GFX900-SDAG-NEXT: v_pk_max_f16 v7, v7, v7
+; GFX900-SDAG-NEXT: v_pk_max_f16 v21, v24, v24
+; GFX900-SDAG-NEXT: v_pk_max_f16 v8, v8, v8
+; GFX900-SDAG-NEXT: v_pk_max_f16 v22, v25, v25
+; GFX900-SDAG-NEXT: v_pk_max_f16 v9, v9, v9
+; GFX900-SDAG-NEXT: v_pk_max_f16 v23, v26, v26
+; GFX900-SDAG-NEXT: v_pk_max_f16 v10, v10, v10
+; GFX900-SDAG-NEXT: v_pk_max_f16 v24, v27, v27
+; GFX900-SDAG-NEXT: v_pk_max_f16 v11, v11, v11
+; GFX900-SDAG-NEXT: v_pk_max_f16 v25, v28, v28
+; GFX900-SDAG-NEXT: v_pk_max_f16 v12, v12, v12
+; GFX900-SDAG-NEXT: v_pk_max_f16 v26, v29, v29
+; GFX900-SDAG-NEXT: v_pk_max_f16 v13, v13, v13
+; GFX900-SDAG-NEXT: v_pk_max_f16 v27, v30, v30
+; GFX900-SDAG-NEXT: v_pk_max_f16 v14, v14, v14
+; GFX900-SDAG-NEXT: v_pk_max_f16 v15, v15, v15
+; GFX900-SDAG-NEXT: v_pk_max_f16 v4, v4, v17
+; GFX900-SDAG-NEXT: v_pk_max_f16 v5, v5, v18
+; GFX900-SDAG-NEXT: v_pk_max_f16 v6, v6, v19
+; GFX900-SDAG-NEXT: v_pk_max_f16 v7, v7, v20
+; GFX900-SDAG-NEXT: v_pk_max_f16 v8, v8, v21
+; GFX900-SDAG-NEXT: v_pk_max_f16 v9, v9, v22
+; GFX900-SDAG-NEXT: v_pk_max_f16 v10, v10, v23
+; GFX900-SDAG-NEXT: v_pk_max_f16 v11, v11, v24
+; GFX900-SDAG-NEXT: v_pk_max_f16 v12, v12, v25
+; GFX900-SDAG-NEXT: v_pk_max_f16 v13, v13, v26
+; GFX900-SDAG-NEXT: v_pk_max_f16 v14, v14, v27
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX900-SDAG-NEXT: v_pk_max_f16 v16, v16, v16
+; GFX900-SDAG-NEXT: v_pk_max_f16 v15, v15, v16
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_maximumnum_v32f16:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX900-GISEL-NEXT: v_pk_max_f16 v16, v16, v16
+; GFX900-GISEL-NEXT: v_pk_max_f16 v0, v0, v16
+; GFX900-GISEL-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX900-GISEL-NEXT: v_pk_max_f16 v16, v17, v17
+; GFX900-GISEL-NEXT: v_pk_max_f16 v1, v1, v16
+; GFX900-GISEL-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX900-GISEL-NEXT: v_pk_max_f16 v16, v18, v18
+; GFX900-GISEL-NEXT: v_pk_max_f16 v2, v2, v16
+; GFX900-GISEL-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX900-GISEL-NEXT: v_pk_max_f16 v16, v19, v19
+; GFX900-GISEL-NEXT: v_pk_max_f16 v3, v3, v16
+; GFX900-GISEL-NEXT: buffer_load_dword v16, off, s[0:3], s32
+; GFX900-GISEL-NEXT: v_pk_max_f16 v4, v4, v4
+; GFX900-GISEL-NEXT: v_pk_max_f16 v17, v20, v20
+; GFX900-GISEL-NEXT: v_pk_max_f16 v5, v5, v5
+; GFX900-GISEL-NEXT: v_pk_max_f16 v18, v21, v21
+; GFX900-GISEL-NEXT: v_pk_max_f16 v6, v6, v6
+; GFX900-GISEL-NEXT: v_pk_max_f16 v19, v22, v22
+; GFX900-GISEL-NEXT: v_pk_max_f16 v7, v7, v7
+; GFX900-GISEL-NEXT: v_pk_max_f16 v20, v23, v23
+; GFX900-GISEL-NEXT: v_pk_max_f16 v8, v8, v8
+; GFX900-GISEL-NEXT: v_pk_max_f16 v21, v24, v24
+; GFX900-GISEL-NEXT: v_pk_max_f16 v9, v9, v9
+; GFX900-GISEL-NEXT: v_pk_max_f16 v22, v25, v25
+; GFX900-GISEL-NEXT: v_pk_max_f16 v10, v10, v10
+; GFX900-GISEL-NEXT: v_pk_max_f16 v23, v26, v26
+; GFX900-GISEL-NEXT: v_pk_max_f16 v11, v11, v11
+; GFX900-GISEL-NEXT: v_pk_max_f16 v24, v27, v27
+; GFX900-GISEL-NEXT: v_pk_max_f16 v12, v12, v12
+; GFX900-GISEL-NEXT: v_pk_max_f16 v25, v28, v28
+; GFX900-GISEL-NEXT: v_pk_max_f16 v13, v13, v13
+; GFX900-GISEL-NEXT: v_pk_max_f16 v26, v29, v29
+; GFX900-GISEL-NEXT: v_pk_max_f16 v14, v14, v14
+; GFX900-GISEL-NEXT: v_pk_max_f16 v27, v30, v30
+; GFX900-GISEL-NEXT: v_pk_max_f16 v15, v15, v15
+; GFX900-GISEL-NEXT: v_pk_max_f16 v4, v4, v17
+; GFX900-GISEL-NEXT: v_pk_max_f16 v5, v5, v18
+; GFX900-GISEL-NEXT: v_pk_max_f16 v6, v6, v19
+; GFX900-GISEL-NEXT: v_pk_max_f16 v7, v7, v20
+; GFX900-GISEL-NEXT: v_pk_max_f16 v8, v8, v21
+; GFX900-GISEL-NEXT: v_pk_max_f16 v9, v9, v22
+; GFX900-GISEL-NEXT: v_pk_max_f16 v10, v10, v23
+; GFX900-GISEL-NEXT: v_pk_max_f16 v11, v11, v24
+; GFX900-GISEL-NEXT: v_pk_max_f16 v12, v12, v25
+; GFX900-GISEL-NEXT: v_pk_max_f16 v13, v13, v26
+; GFX900-GISEL-NEXT: v_pk_max_f16 v14, v14, v27
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX900-GISEL-NEXT: v_pk_max_f16 v16, v16, v16
+; GFX900-GISEL-NEXT: v_pk_max_f16 v15, v15, v16
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_maximumnum_v32f16:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: scratch_load_dword v31, off, s32
+; GFX950-SDAG-NEXT: v_pk_max_f16 v16, v16, v16
+; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX950-SDAG-NEXT: v_pk_max_f16 v17, v17, v17
+; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX950-SDAG-NEXT: v_pk_max_f16 v18, v18, v18
+; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX950-SDAG-NEXT: v_pk_max_f16 v19, v19, v19
+; GFX950-SDAG-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX950-SDAG-NEXT: v_pk_max_f16 v20, v20, v20
+; GFX950-SDAG-NEXT: v_pk_max_f16 v4, v4, v4
+; GFX950-SDAG-NEXT: v_pk_max_f16 v21, v21, v21
+; GFX950-SDAG-NEXT: v_pk_max_f16 v5, v5, v5
+; GFX950-SDAG-NEXT: v_pk_max_f16 v22, v22, v22
+; GFX950-SDAG-NEXT: v_pk_max_f16 v6, v6, v6
+; GFX950-SDAG-NEXT: v_pk_max_f16 v23, v23, v23
+; GFX950-SDAG-NEXT: v_pk_max_f16 v7, v7, v7
+; GFX950-SDAG-NEXT: v_pk_max_f16 v24, v24, v24
+; GFX950-SDAG-NEXT: v_pk_max_f16 v8, v8, v8
+; GFX950-SDAG-NEXT: v_pk_max_f16 v25, v25, v25
+; GFX950-SDAG-NEXT: v_pk_max_f16 v9, v9, v9
+; GFX950-SDAG-NEXT: v_pk_max_f16 v26, v26, v26
+; GFX950-SDAG-NEXT: v_pk_max_f16 v10, v10, v10
+; GFX950-SDAG-NEXT: v_pk_max_f16 v27, v27, v27
+; GFX950-SDAG-NEXT: v_pk_max_f16 v11, v11, v11
+; GFX950-SDAG-NEXT: v_pk_max_f16 v28, v28, v28
+; GFX950-SDAG-NEXT: v_pk_max_f16 v12, v12, v12
+; GFX950-SDAG-NEXT: v_pk_max_f16 v29, v29, v29
+; GFX950-SDAG-NEXT: v_pk_max_f16 v13, v13, v13
+; GFX950-SDAG-NEXT: v_pk_max_f16 v30, v30, v30
+; GFX950-SDAG-NEXT: v_pk_max_f16 v14, v14, v14
+; GFX950-SDAG-NEXT: v_pk_max_f16 v15, v15, v15
+; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v16
+; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v17
+; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v2, v18
+; GFX950-SDAG-NEXT: v_pk_max_f16 v3, v3, v19
+; GFX950-SDAG-NEXT: v_pk_max_f16 v4, v4, v20
+; GFX950-SDAG-NEXT: v_pk_max_f16 v5, v5, v21
+; GFX950-SDAG-NEXT: v_pk_max_f16 v6, v6, v22
+; GFX950-SDAG-NEXT: v_pk_max_f16 v7, v7, v23
+; GFX950-SDAG-NEXT: v_pk_max_f16 v8, v8, v24
+; GFX950-SDAG-NEXT: v_pk_max_f16 v9, v9, v25
+; GFX950-SDAG-NEXT: v_pk_max_f16 v10, v10, v26
+; GFX950-SDAG-NEXT: v_pk_max_f16 v11, v11, v27
+; GFX950-SDAG-NEXT: v_pk_max_f16 v12, v12, v28
+; GFX950-SDAG-NEXT: v_pk_max_f16 v13, v13, v29
+; GFX950-SDAG-NEXT: v_pk_max_f16 v14, v14, v30
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: v_pk_max_f16 v16, v31, v31
+; GFX950-SDAG-NEXT: s_nop 0
+; GFX950-SDAG-NEXT: v_pk_max_f16 v15, v15, v16
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_maximumnum_v32f16:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: scratch_load_dword v31, off, s32
+; GFX950-GISEL-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX950-GISEL-NEXT: v_pk_max_f16 v16, v16, v16
+; GFX950-GISEL-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX950-GISEL-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX950-GISEL-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX950-GISEL-NEXT: v_pk_max_f16 v4, v4, v4
+; GFX950-GISEL-NEXT: v_pk_max_f16 v5, v5, v5
+; GFX950-GISEL-NEXT: v_pk_max_f16 v6, v6, v6
+; GFX950-GISEL-NEXT: v_pk_max_f16 v7, v7, v7
+; GFX950-GISEL-NEXT: v_pk_max_f16 v8, v8, v8
+; GFX950-GISEL-NEXT: v_pk_max_f16 v9, v9, v9
+; GFX950-GISEL-NEXT: v_pk_max_f16 v10, v10, v10
+; GFX950-GISEL-NEXT: v_pk_max_f16 v11, v11, v11
+; GFX950-GISEL-NEXT: v_pk_max_f16 v12, v12, v12
+; GFX950-GISEL-NEXT: v_pk_max_f16 v13, v13, v13
+; GFX950-GISEL-NEXT: v_pk_max_f16 v14, v14, v14
+; GFX950-GISEL-NEXT: v_pk_max_f16 v15, v15, v15
+; GFX950-GISEL-NEXT: v_pk_max_f16 v17, v17, v17
+; GFX950-GISEL-NEXT: v_pk_max_f16 v18, v18, v18
+; GFX950-GISEL-NEXT: v_pk_max_f16 v19, v19, v19
+; GFX950-GISEL-NEXT: v_pk_max_f16 v20, v20, v20
+; GFX950-GISEL-NEXT: v_pk_max_f16 v21, v21, v21
+; GFX950-GISEL-NEXT: v_pk_max_f16 v22, v22, v22
+; GFX950-GISEL-NEXT: v_pk_max_f16 v23, v23, v23
+; GFX950-GISEL-NEXT: v_pk_max_f16 v24, v24, v24
+; GFX950-GISEL-NEXT: v_pk_max_f16 v25, v25, v25
+; GFX950-GISEL-NEXT: v_pk_max_f16 v26, v26, v26
+; GFX950-GISEL-NEXT: v_pk_max_f16 v27, v27, v27
+; GFX950-GISEL-NEXT: v_pk_max_f16 v28, v28, v28
+; GFX950-GISEL-NEXT: v_pk_max_f16 v29, v29, v29
+; GFX950-GISEL-NEXT: v_pk_max_f16 v30, v30, v30
+; GFX950-GISEL-NEXT: v_pk_max_f16 v0, v0, v16
+; GFX950-GISEL-NEXT: v_pk_max_f16 v1, v1, v17
+; GFX950-GISEL-NEXT: v_pk_max_f16 v2, v2, v18
+; GFX950-GISEL-NEXT: v_pk_max_f16 v3, v3, v19
+; GFX950-GISEL-NEXT: v_pk_max_f16 v4, v4, v20
+; GFX950-GISEL-NEXT: v_pk_max_f16 v5, v5, v21
+; GFX950-GISEL-NEXT: v_pk_max_f16 v6, v6, v22
+; GFX950-GISEL-NEXT: v_pk_max_f16 v7, v7, v23
+; GFX950-GISEL-NEXT: v_pk_max_f16 v8, v8, v24
+; GFX950-GISEL-NEXT: v_pk_max_f16 v9, v9, v25
+; GFX950-GISEL-NEXT: v_pk_max_f16 v10, v10, v26
+; GFX950-GISEL-NEXT: v_pk_max_f16 v11, v11, v27
+; GFX950-GISEL-NEXT: v_pk_max_f16 v12, v12, v28
+; GFX950-GISEL-NEXT: v_pk_max_f16 v13, v13, v29
+; GFX950-GISEL-NEXT: v_pk_max_f16 v14, v14, v30
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-GISEL-NEXT: v_pk_max_f16 v16, v31, v31
+; GFX950-GISEL-NEXT: s_nop 0
+; GFX950-GISEL-NEXT: v_pk_max_f16 v15, v15, v16
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_maximumnum_v32f16:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32
+; GFX10-SDAG-NEXT: v_pk_max_f16 v16, v16, v16
+; GFX10-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX10-SDAG-NEXT: v_pk_max_f16 v17, v17, v17
+; GFX10-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX10-SDAG-NEXT: v_pk_max_f16 v18, v18, v18
+; GFX10-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX10-SDAG-NEXT: v_pk_max_f16 v19, v19, v19
+; GFX10-SDAG-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX10-SDAG-NEXT: v_pk_max_f16 v20, v20, v20
+; GFX10-SDAG-NEXT: v_pk_max_f16 v4, v4, v4
+; GFX10-SDAG-NEXT: v_pk_max_f16 v21, v21, v21
+; GFX10-SDAG-NEXT: v_pk_max_f16 v5, v5, v5
+; GFX10-SDAG-NEXT: v_pk_max_f16 v22, v22, v22
+; GFX10-SDAG-NEXT: v_pk_max_f16 v6, v6, v6
+; GFX10-SDAG-NEXT: v_pk_max_f16 v23, v23, v23
+; GFX10-SDAG-NEXT: v_pk_max_f16 v7, v7, v7
+; GFX10-SDAG-NEXT: v_pk_max_f16 v24, v24, v24
+; GFX10-SDAG-NEXT: v_pk_max_f16 v8, v8, v8
+; GFX10-SDAG-NEXT: v_pk_max_f16 v25, v25, v25
+; GFX10-SDAG-NEXT: v_pk_max_f16 v9, v9, v9
+; GFX10-SDAG-NEXT: v_pk_max_f16 v26, v26, v26
+; GFX10-SDAG-NEXT: v_pk_max_f16 v10, v10, v10
+; GFX10-SDAG-NEXT: v_pk_max_f16 v27, v27, v27
+; GFX10-SDAG-NEXT: v_pk_max_f16 v11, v11, v11
+; GFX10-SDAG-NEXT: v_pk_max_f16 v28, v28, v28
+; GFX10-SDAG-NEXT: v_pk_max_f16 v12, v12, v12
+; GFX10-SDAG-NEXT: v_pk_max_f16 v29, v29, v29
+; GFX10-SDAG-NEXT: v_pk_max_f16 v13, v13, v13
+; GFX10-SDAG-NEXT: v_pk_max_f16 v30, v30, v30
+; GFX10-SDAG-NEXT: v_pk_max_f16 v14, v14, v14
+; GFX10-SDAG-NEXT: v_pk_max_f16 v15, v15, v15
+; GFX10-SDAG-NEXT: v_pk_max_f16 v0, v0, v16
+; GFX10-SDAG-NEXT: v_pk_max_f16 v1, v1, v17
+; GFX10-SDAG-NEXT: v_pk_max_f16 v2, v2, v18
+; GFX10-SDAG-NEXT: v_pk_max_f16 v3, v3, v19
+; GFX10-SDAG-NEXT: v_pk_max_f16 v4, v4, v20
+; GFX10-SDAG-NEXT: v_pk_max_f16 v5, v5, v21
+; GFX10-SDAG-NEXT: v_pk_max_f16 v6, v6, v22
+; GFX10-SDAG-NEXT: v_pk_max_f16 v7, v7, v23
+; GFX10-SDAG-NEXT: v_pk_max_f16 v8, v8, v24
+; GFX10-SDAG-NEXT: v_pk_max_f16 v9, v9, v25
+; GFX10-SDAG-NEXT: v_pk_max_f16 v10, v10, v26
+; GFX10-SDAG-NEXT: v_pk_max_f16 v11, v11, v27
+; GFX10-SDAG-NEXT: v_pk_max_f16 v12, v12, v28
+; GFX10-SDAG-NEXT: v_pk_max_f16 v13, v13, v29
+; GFX10-SDAG-NEXT: v_pk_max_f16 v14, v14, v30
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-SDAG-NEXT: v_pk_max_f16 v16, v31, v31
+; GFX10-SDAG-NEXT: v_pk_max_f16 v15, v15, v16
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_maximumnum_v32f16:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32
+; GFX10-GISEL-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX10-GISEL-NEXT: v_pk_max_f16 v16, v16, v16
+; GFX10-GISEL-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX10-GISEL-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX10-GISEL-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX10-GISEL-NEXT: v_pk_max_f16 v4, v4, v4
+; GFX10-GISEL-NEXT: v_pk_max_f16 v5, v5, v5
+; GFX10-GISEL-NEXT: v_pk_max_f16 v6, v6, v6
+; GFX10-GISEL-NEXT: v_pk_max_f16 v7, v7, v7
+; GFX10-GISEL-NEXT: v_pk_max_f16 v8, v8, v8
+; GFX10-GISEL-NEXT: v_pk_max_f16 v9, v9, v9
+; GFX10-GISEL-NEXT: v_pk_max_f16 v10, v10, v10
+; GFX10-GISEL-NEXT: v_pk_max_f16 v11, v11, v11
+; GFX10-GISEL-NEXT: v_pk_max_f16 v12, v12, v12
+; GFX10-GISEL-NEXT: v_pk_max_f16 v13, v13, v13
+; GFX10-GISEL-NEXT: v_pk_max_f16 v14, v14, v14
+; GFX10-GISEL-NEXT: v_pk_max_f16 v15, v15, v15
+; GFX10-GISEL-NEXT: v_pk_max_f16 v17, v17, v17
+; GFX10-GISEL-NEXT: v_pk_max_f16 v18, v18, v18
+; GFX10-GISEL-NEXT: v_pk_max_f16 v19, v19, v19
+; GFX10-GISEL-NEXT: v_pk_max_f16 v20, v20, v20
+; GFX10-GISEL-NEXT: v_pk_max_f16 v21, v21, v21
+; GFX10-GISEL-NEXT: v_pk_max_f16 v22, v22, v22
+; GFX10-GISEL-NEXT: v_pk_max_f16 v23, v23, v23
+; GFX10-GISEL-NEXT: v_pk_max_f16 v24, v24, v24
+; GFX10-GISEL-NEXT: v_pk_max_f16 v25, v25, v25
+; GFX10-GISEL-NEXT: v_pk_max_f16 v26, v26, v26
+; GFX10-GISEL-NEXT: v_pk_max_f16 v27, v27, v27
+; GFX10-GISEL-NEXT: v_pk_max_f16 v28, v28, v28
+; GFX10-GISEL-NEXT: v_pk_max_f16 v29, v29, v29
+; GFX10-GISEL-NEXT: v_pk_max_f16 v30, v30, v30
+; GFX10-GISEL-NEXT: v_pk_max_f16 v0, v0, v16
+; GFX10-GISEL-NEXT: v_pk_max_f16 v1, v1, v17
+; GFX10-GISEL-NEXT: v_pk_max_f16 v2, v2, v18
+; GFX10-GISEL-NEXT: v_pk_max_f16 v3, v3, v19
+; GFX10-GISEL-NEXT: v_pk_max_f16 v4, v4, v20
+; GFX10-GISEL-NEXT: v_pk_max_f16 v5, v5, v21
+; GFX10-GISEL-NEXT: v_pk_max_f16 v6, v6, v22
+; GFX10-GISEL-NEXT: v_pk_max_f16 v7, v7, v23
+; GFX10-GISEL-NEXT: v_pk_max_f16 v8, v8, v24
+; GFX10-GISEL-NEXT: v_pk_max_f16 v9, v9, v25
+; GFX10-GISEL-NEXT: v_pk_max_f16 v10, v10, v26
+; GFX10-GISEL-NEXT: v_pk_max_f16 v11, v11, v27
+; GFX10-GISEL-NEXT: v_pk_max_f16 v12, v12, v28
+; GFX10-GISEL-NEXT: v_pk_max_f16 v13, v13, v29
+; GFX10-GISEL-NEXT: v_pk_max_f16 v14, v14, v30
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-GISEL-NEXT: v_pk_max_f16 v16, v31, v31
+; GFX10-GISEL-NEXT: v_pk_max_f16 v15, v15, v16
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_maximumnum_v32f16:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: scratch_load_b32 v31, off, s32
+; GFX11-SDAG-NEXT: v_pk_max_f16 v16, v16, v16
+; GFX11-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX11-SDAG-NEXT: v_pk_max_f16 v17, v17, v17
+; GFX11-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX11-SDAG-NEXT: v_pk_max_f16 v18, v18, v18
+; GFX11-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX11-SDAG-NEXT: v_pk_max_f16 v19, v19, v19
+; GFX11-SDAG-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX11-SDAG-NEXT: v_pk_max_f16 v20, v20, v20
+; GFX11-SDAG-NEXT: v_pk_max_f16 v4, v4, v4
+; GFX11-SDAG-NEXT: v_pk_max_f16 v21, v21, v21
+; GFX11-SDAG-NEXT: v_pk_max_f16 v5, v5, v5
+; GFX11-SDAG-NEXT: v_pk_max_f16 v22, v22, v22
+; GFX11-SDAG-NEXT: v_pk_max_f16 v6, v6, v6
+; GFX11-SDAG-NEXT: v_pk_max_f16 v23, v23, v23
+; GFX11-SDAG-NEXT: v_pk_max_f16 v7, v7, v7
+; GFX11-SDAG-NEXT: v_pk_max_f16 v24, v24, v24
+; GFX11-SDAG-NEXT: v_pk_max_f16 v8, v8, v8
+; GFX11-SDAG-NEXT: v_pk_max_f16 v25, v25, v25
+; GFX11-SDAG-NEXT: v_pk_max_f16 v9, v9, v9
+; GFX11-SDAG-NEXT: v_pk_max_f16 v26, v26, v26
+; GFX11-SDAG-NEXT: v_pk_max_f16 v10, v10, v10
+; GFX11-SDAG-NEXT: v_pk_max_f16 v27, v27, v27
+; GFX11-SDAG-NEXT: v_pk_max_f16 v11, v11, v11
+; GFX11-SDAG-NEXT: v_pk_max_f16 v28, v28, v28
+; GFX11-SDAG-NEXT: v_pk_max_f16 v12, v12, v12
+; GFX11-SDAG-NEXT: v_pk_max_f16 v29, v29, v29
+; GFX11-SDAG-NEXT: v_pk_max_f16 v13, v13, v13
+; GFX11-SDAG-NEXT: v_pk_max_f16 v30, v30, v30
+; GFX11-SDAG-NEXT: v_pk_max_f16 v14, v14, v14
+; GFX11-SDAG-NEXT: v_pk_max_f16 v15, v15, v15
+; GFX11-SDAG-NEXT: v_pk_max_f16 v0, v0, v16
+; GFX11-SDAG-NEXT: v_pk_max_f16 v1, v1, v17
+; GFX11-SDAG-NEXT: v_pk_max_f16 v2, v2, v18
+; GFX11-SDAG-NEXT: v_pk_max_f16 v3, v3, v19
+; GFX11-SDAG-NEXT: v_pk_max_f16 v4, v4, v20
+; GFX11-SDAG-NEXT: v_pk_max_f16 v5, v5, v21
+; GFX11-SDAG-NEXT: v_pk_max_f16 v6, v6, v22
+; GFX11-SDAG-NEXT: v_pk_max_f16 v7, v7, v23
+; GFX11-SDAG-NEXT: v_pk_max_f16 v8, v8, v24
+; GFX11-SDAG-NEXT: v_pk_max_f16 v9, v9, v25
+; GFX11-SDAG-NEXT: v_pk_max_f16 v10, v10, v26
+; GFX11-SDAG-NEXT: v_pk_max_f16 v11, v11, v27
+; GFX11-SDAG-NEXT: v_pk_max_f16 v12, v12, v28
+; GFX11-SDAG-NEXT: v_pk_max_f16 v13, v13, v29
+; GFX11-SDAG-NEXT: v_pk_max_f16 v14, v14, v30
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT: v_pk_max_f16 v16, v31, v31
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_pk_max_f16 v15, v15, v16
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_maximumnum_v32f16:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: scratch_load_b32 v31, off, s32
+; GFX11-GISEL-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX11-GISEL-NEXT: v_pk_max_f16 v16, v16, v16
+; GFX11-GISEL-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX11-GISEL-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX11-GISEL-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX11-GISEL-NEXT: v_pk_max_f16 v4, v4, v4
+; GFX11-GISEL-NEXT: v_pk_max_f16 v5, v5, v5
+; GFX11-GISEL-NEXT: v_pk_max_f16 v6, v6, v6
+; GFX11-GISEL-NEXT: v_pk_max_f16 v7, v7, v7
+; GFX11-GISEL-NEXT: v_pk_max_f16 v8, v8, v8
+; GFX11-GISEL-NEXT: v_pk_max_f16 v9, v9, v9
+; GFX11-GISEL-NEXT: v_pk_max_f16 v10, v10, v10
+; GFX11-GISEL-NEXT: v_pk_max_f16 v11, v11, v11
+; GFX11-GISEL-NEXT: v_pk_max_f16 v12, v12, v12
+; GFX11-GISEL-NEXT: v_pk_max_f16 v13, v13, v13
+; GFX11-GISEL-NEXT: v_pk_max_f16 v14, v14, v14
+; GFX11-GISEL-NEXT: v_pk_max_f16 v15, v15, v15
+; GFX11-GISEL-NEXT: v_pk_max_f16 v17, v17, v17
+; GFX11-GISEL-NEXT: v_pk_max_f16 v18, v18, v18
+; GFX11-GISEL-NEXT: v_pk_max_f16 v19, v19, v19
+; GFX11-GISEL-NEXT: v_pk_max_f16 v20, v20, v20
+; GFX11-GISEL-NEXT: v_pk_max_f16 v21, v21, v21
+; GFX11-GISEL-NEXT: v_pk_max_f16 v22, v22, v22
+; GFX11-GISEL-NEXT: v_pk_max_f16 v23, v23, v23
+; GFX11-GISEL-NEXT: v_pk_max_f16 v24, v24, v24
+; GFX11-GISEL-NEXT: v_pk_max_f16 v25, v25, v25
+; GFX11-GISEL-NEXT: v_pk_max_f16 v26, v26, v26
+; GFX11-GISEL-NEXT: v_pk_max_f16 v27, v27, v27
+; GFX11-GISEL-NEXT: v_pk_max_f16 v28, v28, v28
+; GFX11-GISEL-NEXT: v_pk_max_f16 v29, v29, v29
+; GFX11-GISEL-NEXT: v_pk_max_f16 v30, v30, v30
+; GFX11-GISEL-NEXT: v_pk_max_f16 v0, v0, v16
+; GFX11-GISEL-NEXT: v_pk_max_f16 v1, v1, v17
+; GFX11-GISEL-NEXT: v_pk_max_f16 v2, v2, v18
+; GFX11-GISEL-NEXT: v_pk_max_f16 v3, v3, v19
+; GFX11-GISEL-NEXT: v_pk_max_f16 v4, v4, v20
+; GFX11-GISEL-NEXT: v_pk_max_f16 v5, v5, v21
+; GFX11-GISEL-NEXT: v_pk_max_f16 v6, v6, v22
+; GFX11-GISEL-NEXT: v_pk_max_f16 v7, v7, v23
+; GFX11-GISEL-NEXT: v_pk_max_f16 v8, v8, v24
+; GFX11-GISEL-NEXT: v_pk_max_f16 v9, v9, v25
+; GFX11-GISEL-NEXT: v_pk_max_f16 v10, v10, v26
+; GFX11-GISEL-NEXT: v_pk_max_f16 v11, v11, v27
+; GFX11-GISEL-NEXT: v_pk_max_f16 v12, v12, v28
+; GFX11-GISEL-NEXT: v_pk_max_f16 v13, v13, v29
+; GFX11-GISEL-NEXT: v_pk_max_f16 v14, v14, v30
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT: v_pk_max_f16 v16, v31, v31
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_pk_max_f16 v15, v15, v16
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-SDAG-LABEL: v_maximumnum_v32f16:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: scratch_load_b32 v31, off, s32
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v16, v16, v16
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v17, v17, v17
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v18, v18, v18
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v2
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v19, v19, v19
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v3
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v20, v20, v20
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v4, v4, v4
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v21, v21, v21
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v5, v5, v5
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v22, v22, v22
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v6, v6, v6
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v23, v23, v23
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v7, v7, v7
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v24, v24, v24
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v8, v8, v8
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v25, v25, v25
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v9, v9, v9
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v26, v26, v26
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v10, v10, v10
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v27, v27, v27
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v11, v11, v11
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v28, v28, v28
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v12, v12, v12
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v29, v29, v29
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v13, v13, v13
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v30, v30, v30
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v14, v14, v14
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v15, v15, v15
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v16
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v17
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v18
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v19
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v4, v4, v20
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v5, v5, v21
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v6, v6, v22
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v7, v7, v23
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v8, v8, v24
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v9, v9, v25
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v10, v10, v26
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v11, v11, v27
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v12, v12, v28
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v13, v13, v29
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v14, v14, v30
+; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v16, v31, v31
+; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v15, v15, v16
+; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GISEL-LABEL: v_maximumnum_v32f16:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: scratch_load_b32 v31, off, s32
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v16, v16, v16
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v2
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v3
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v4, v4, v4
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v5, v5, v5
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v6, v6, v6
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v7, v7, v7
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v8, v8, v8
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v9, v9, v9
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v10, v10, v10
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v11, v11, v11
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v12, v12, v12
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v13, v13, v13
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v14, v14, v14
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v15, v15, v15
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v17, v17, v17
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v18, v18, v18
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v19, v19, v19
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v20, v20, v20
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v21, v21, v21
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v22, v22, v22
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v23, v23, v23
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v24, v24, v24
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v25, v25, v25
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v26, v26, v26
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v27, v27, v27
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v28, v28, v28
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v29, v29, v29
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v30, v30, v30
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v16
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v17
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v18
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v19
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v4, v4, v20
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v5, v5, v21
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v6, v6, v22
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v7, v7, v23
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v8, v8, v24
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v9, v9, v25
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v10, v10, v26
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v11, v11, v27
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v12, v12, v28
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v13, v13, v29
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v14, v14, v30
+; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v16, v31, v31
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v15, v15, v16
+; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
+ %result = call <32 x half> @llvm.maximumnum.v32f16(<32 x half> %x, <32 x half> %y)
+ ret <32 x half> %result
+}
+
+define <2 x float> @v_maximumnum_v2f32(<2 x float> %x, <2 x float> %y) {
+; GFX7-SDAG-LABEL: v_maximumnum_v2f32:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v2, 1.0, v2
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v2, 1.0, v3
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX7-SDAG-NEXT: v_max_f32_e32 v1, v1, v2
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_maximumnum_v2f32:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v2, 1.0, v2
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v3, 1.0, v3
+; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX7-GISEL-NEXT: v_max_f32_e32 v1, v1, v3
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_maximumnum_v2f32:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_mul_f32_e32 v2, 1.0, v2
+; GFX8-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX8-SDAG-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX8-SDAG-NEXT: v_mul_f32_e32 v2, 1.0, v3
+; GFX8-SDAG-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX8-SDAG-NEXT: v_max_f32_e32 v1, v1, v2
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_maximumnum_v2f32:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX8-GISEL-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX8-GISEL-NEXT: v_mul_f32_e32 v2, 1.0, v2
+; GFX8-GISEL-NEXT: v_mul_f32_e32 v3, 1.0, v3
+; GFX8-GISEL-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX8-GISEL-NEXT: v_max_f32_e32 v1, v1, v3
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: v_maximumnum_v2f32:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX9-SDAG-NEXT: v_max_f32_e32 v2, v3, v3
+; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v2
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_maximumnum_v2f32:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX900-GISEL-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX900-GISEL-NEXT: v_max_f32_e32 v3, v3, v3
+; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX900-GISEL-NEXT: v_max_f32_e32 v1, v1, v3
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_maximumnum_v2f32:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_pk_mul_f32 v[0:1], 1.0, v[0:1] op_sel_hi:[0,1]
+; GFX950-GISEL-NEXT: v_pk_mul_f32 v[2:3], 1.0, v[2:3] op_sel_hi:[0,1]
+; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-GISEL-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX950-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX950-GISEL-NEXT: v_max_f32_e32 v2, v3, v3
+; GFX950-GISEL-NEXT: v_max_f32_e32 v1, v1, v2
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_maximumnum_v2f32:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX10-SDAG-NEXT: v_max_f32_e32 v3, v3, v3
+; GFX10-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX10-SDAG-NEXT: v_max_f32_e32 v1, v1, v3
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_maximumnum_v2f32:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX10-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX10-GISEL-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX10-GISEL-NEXT: v_max_f32_e32 v3, v3, v3
+; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX10-GISEL-NEXT: v_max_f32_e32 v1, v1, v3
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_maximumnum_v2f32:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v3, v3, v3
+; GFX11-SDAG-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_dual_max_f32 v0, v0, v2 :: v_dual_max_f32 v1, v1, v3
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_maximumnum_v2f32:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
+; GFX11-GISEL-NEXT: v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v3, v3, v3
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_dual_max_f32 v0, v0, v2 :: v_dual_max_f32 v1, v1, v3
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-SDAG-LABEL: v_maximumnum_v2f32:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3
+; GFX12-SDAG-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
+; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-SDAG-NEXT: v_dual_max_num_f32 v0, v0, v2 :: v_dual_max_num_f32 v1, v1, v3
+; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GISEL-LABEL: v_maximumnum_v2f32:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
+; GFX12-GISEL-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v2 :: v_dual_max_num_f32 v1, v1, v3
+; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call <2 x float> @llvm.maximumnum.v2f32(<2 x float> %x, <2 x float> %y)
ret <2 x float> %result
}
@@ -3884,87 +6766,190 @@ define <2 x float> @v_maximumnum_v2f32_nnan(<2 x float> %x, <2 x float> %y) {
}
define <3 x float> @v_maximumnum_v3f32(<3 x float> %x, <3 x float> %y) {
-; GFX7-LABEL: v_maximumnum_v3f32:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_mul_f32_e32 v3, 1.0, v3
-; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
-; GFX7-NEXT: v_max_f32_e32 v0, v0, v3
-; GFX7-NEXT: v_mul_f32_e32 v3, 1.0, v4
-; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
-; GFX7-NEXT: v_max_f32_e32 v1, v1, v3
-; GFX7-NEXT: v_mul_f32_e32 v3, 1.0, v5
-; GFX7-NEXT: v_mul_f32_e32 v2, 1.0, v2
-; GFX7-NEXT: v_max_f32_e32 v2, v2, v3
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_maximumnum_v3f32:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_mul_f32_e32 v3, 1.0, v3
-; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0
-; GFX8-NEXT: v_max_f32_e32 v0, v0, v3
-; GFX8-NEXT: v_mul_f32_e32 v3, 1.0, v4
-; GFX8-NEXT: v_mul_f32_e32 v1, 1.0, v1
-; GFX8-NEXT: v_max_f32_e32 v1, v1, v3
-; GFX8-NEXT: v_mul_f32_e32 v3, 1.0, v5
-; GFX8-NEXT: v_mul_f32_e32 v2, 1.0, v2
-; GFX8-NEXT: v_max_f32_e32 v2, v2, v3
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: v_maximumnum_v3f32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v3, v3, v3
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v3
-; GFX9-NEXT: v_max_f32_e32 v3, v4, v4
-; GFX9-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX9-NEXT: v_max_f32_e32 v1, v1, v3
-; GFX9-NEXT: v_max_f32_e32 v3, v5, v5
-; GFX9-NEXT: v_max_f32_e32 v2, v2, v2
-; GFX9-NEXT: v_max_f32_e32 v2, v2, v3
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_maximumnum_v3f32:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_max_f32_e32 v3, v3, v3
-; GFX10-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX10-NEXT: v_max_f32_e32 v4, v4, v4
-; GFX10-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX10-NEXT: v_max_f32_e32 v5, v5, v5
-; GFX10-NEXT: v_max_f32_e32 v2, v2, v2
-; GFX10-NEXT: v_max_f32_e32 v0, v0, v3
-; GFX10-NEXT: v_max_f32_e32 v1, v1, v4
-; GFX10-NEXT: v_max_f32_e32 v2, v2, v5
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: v_maximumnum_v3f32:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_dual_max_f32 v3, v3, v3 :: v_dual_max_f32 v0, v0, v0
-; GFX11-NEXT: v_dual_max_f32 v4, v4, v4 :: v_dual_max_f32 v1, v1, v1
-; GFX11-NEXT: v_dual_max_f32 v5, v5, v5 :: v_dual_max_f32 v2, v2, v2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_dual_max_f32 v0, v0, v3 :: v_dual_max_f32 v1, v1, v4
-; GFX11-NEXT: v_max_f32_e32 v2, v2, v5
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_maximumnum_v3f32:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_dual_max_num_f32 v3, v3, v3 :: v_dual_max_num_f32 v0, v0, v0
-; GFX12-NEXT: v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v1, v1, v1
-; GFX12-NEXT: v_dual_max_num_f32 v5, v5, v5 :: v_dual_max_num_f32 v2, v2, v2
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX12-NEXT: v_dual_max_num_f32 v0, v0, v3 :: v_dual_max_num_f32 v1, v1, v4
-; GFX12-NEXT: v_max_num_f32_e32 v2, v2, v5
-; GFX12-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_maximumnum_v3f32:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v3, 1.0, v3
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v3
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v3, 1.0, v4
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX7-SDAG-NEXT: v_max_f32_e32 v1, v1, v3
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v3, 1.0, v5
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v2, 1.0, v2
+; GFX7-SDAG-NEXT: v_max_f32_e32 v2, v2, v3
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_maximumnum_v3f32:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v2, 1.0, v2
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v3, 1.0, v3
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v4, 1.0, v4
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v5, 1.0, v5
+; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v3
+; GFX7-GISEL-NEXT: v_max_f32_e32 v1, v1, v4
+; GFX7-GISEL-NEXT: v_max_f32_e32 v2, v2, v5
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_maximumnum_v3f32:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_mul_f32_e32 v3, 1.0, v3
+; GFX8-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX8-SDAG-NEXT: v_max_f32_e32 v0, v0, v3
+; GFX8-SDAG-NEXT: v_mul_f32_e32 v3, 1.0, v4
+; GFX8-SDAG-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX8-SDAG-NEXT: v_max_f32_e32 v1, v1, v3
+; GFX8-SDAG-NEXT: v_mul_f32_e32 v3, 1.0, v5
+; GFX8-SDAG-NEXT: v_mul_f32_e32 v2, 1.0, v2
+; GFX8-SDAG-NEXT: v_max_f32_e32 v2, v2, v3
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_maximumnum_v3f32:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX8-GISEL-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX8-GISEL-NEXT: v_mul_f32_e32 v2, 1.0, v2
+; GFX8-GISEL-NEXT: v_mul_f32_e32 v3, 1.0, v3
+; GFX8-GISEL-NEXT: v_mul_f32_e32 v4, 1.0, v4
+; GFX8-GISEL-NEXT: v_mul_f32_e32 v5, 1.0, v5
+; GFX8-GISEL-NEXT: v_max_f32_e32 v0, v0, v3
+; GFX8-GISEL-NEXT: v_max_f32_e32 v1, v1, v4
+; GFX8-GISEL-NEXT: v_max_f32_e32 v2, v2, v5
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: v_maximumnum_v3f32:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_max_f32_e32 v3, v3, v3
+; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v3
+; GFX9-SDAG-NEXT: v_max_f32_e32 v3, v4, v4
+; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v3
+; GFX9-SDAG-NEXT: v_max_f32_e32 v3, v5, v5
+; GFX9-SDAG-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX9-SDAG-NEXT: v_max_f32_e32 v2, v2, v3
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_maximumnum_v3f32:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX900-GISEL-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX900-GISEL-NEXT: v_max_f32_e32 v3, v3, v3
+; GFX900-GISEL-NEXT: v_max_f32_e32 v4, v4, v4
+; GFX900-GISEL-NEXT: v_max_f32_e32 v5, v5, v5
+; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v3
+; GFX900-GISEL-NEXT: v_max_f32_e32 v1, v1, v4
+; GFX900-GISEL-NEXT: v_max_f32_e32 v2, v2, v5
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_maximumnum_v3f32:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_mov_b32_e32 v6, v3
+; GFX950-GISEL-NEXT: v_mov_b32_e32 v7, v4
+; GFX950-GISEL-NEXT: v_pk_mul_f32 v[0:1], 1.0, v[0:1] op_sel_hi:[0,1]
+; GFX950-GISEL-NEXT: v_pk_mul_f32 v[2:3], 1.0, v[2:3] op_sel_hi:[0,1]
+; GFX950-GISEL-NEXT: v_pk_mul_f32 v[6:7], 1.0, v[6:7] op_sel_hi:[0,1]
+; GFX950-GISEL-NEXT: v_mov_b32_e32 v4, v5
+; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-GISEL-NEXT: v_max_f32_e32 v3, v6, v6
+; GFX950-GISEL-NEXT: v_pk_mul_f32 v[4:5], 1.0, v[4:5] op_sel_hi:[0,1]
+; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v3
+; GFX950-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX950-GISEL-NEXT: v_max_f32_e32 v3, v7, v7
+; GFX950-GISEL-NEXT: v_max_f32_e32 v1, v1, v3
+; GFX950-GISEL-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX950-GISEL-NEXT: v_max_f32_e32 v3, v4, v4
+; GFX950-GISEL-NEXT: v_max_f32_e32 v2, v2, v3
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_maximumnum_v3f32:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_max_f32_e32 v3, v3, v3
+; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX10-SDAG-NEXT: v_max_f32_e32 v4, v4, v4
+; GFX10-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX10-SDAG-NEXT: v_max_f32_e32 v5, v5, v5
+; GFX10-SDAG-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v3
+; GFX10-SDAG-NEXT: v_max_f32_e32 v1, v1, v4
+; GFX10-SDAG-NEXT: v_max_f32_e32 v2, v2, v5
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_maximumnum_v3f32:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX10-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX10-GISEL-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX10-GISEL-NEXT: v_max_f32_e32 v3, v3, v3
+; GFX10-GISEL-NEXT: v_max_f32_e32 v4, v4, v4
+; GFX10-GISEL-NEXT: v_max_f32_e32 v5, v5, v5
+; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v3
+; GFX10-GISEL-NEXT: v_max_f32_e32 v1, v1, v4
+; GFX10-GISEL-NEXT: v_max_f32_e32 v2, v2, v5
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_maximumnum_v3f32:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_dual_max_f32 v3, v3, v3 :: v_dual_max_f32 v0, v0, v0
+; GFX11-SDAG-NEXT: v_dual_max_f32 v4, v4, v4 :: v_dual_max_f32 v1, v1, v1
+; GFX11-SDAG-NEXT: v_dual_max_f32 v5, v5, v5 :: v_dual_max_f32 v2, v2, v2
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-NEXT: v_dual_max_f32 v0, v0, v3 :: v_dual_max_f32 v1, v1, v4
+; GFX11-SDAG-NEXT: v_max_f32_e32 v2, v2, v5
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_maximumnum_v3f32:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
+; GFX11-GISEL-NEXT: v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v3, v3, v3
+; GFX11-GISEL-NEXT: v_dual_max_f32 v4, v4, v4 :: v_dual_max_f32 v5, v5, v5
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_dual_max_f32 v0, v0, v3 :: v_dual_max_f32 v1, v1, v4
+; GFX11-GISEL-NEXT: v_max_f32_e32 v2, v2, v5
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-SDAG-LABEL: v_maximumnum_v3f32:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: v_dual_max_num_f32 v3, v3, v3 :: v_dual_max_num_f32 v0, v0, v0
+; GFX12-SDAG-NEXT: v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v1, v1, v1
+; GFX12-SDAG-NEXT: v_dual_max_num_f32 v5, v5, v5 :: v_dual_max_num_f32 v2, v2, v2
+; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX12-SDAG-NEXT: v_dual_max_num_f32 v0, v0, v3 :: v_dual_max_num_f32 v1, v1, v4
+; GFX12-SDAG-NEXT: v_max_num_f32_e32 v2, v2, v5
+; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GISEL-LABEL: v_maximumnum_v3f32:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
+; GFX12-GISEL-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3
+; GFX12-GISEL-NEXT: v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v5, v5, v5
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX12-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v3 :: v_dual_max_num_f32 v1, v1, v4
+; GFX12-GISEL-NEXT: v_max_num_f32_e32 v2, v2, v5
+; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call <3 x float> @llvm.maximumnum.v3f32(<3 x float> %x, <3 x float> %y)
ret <3 x float> %result
}
@@ -4024,101 +7009,218 @@ define <3 x float> @v_maximumnum_v3f32_nnan(<3 x float> %x, <3 x float> %y) {
}
define <4 x float> @v_maximumnum_v4f32(<4 x float> %x, <4 x float> %y) {
-; GFX7-LABEL: v_maximumnum_v4f32:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_mul_f32_e32 v4, 1.0, v4
-; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
-; GFX7-NEXT: v_max_f32_e32 v0, v0, v4
-; GFX7-NEXT: v_mul_f32_e32 v4, 1.0, v5
-; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
-; GFX7-NEXT: v_max_f32_e32 v1, v1, v4
-; GFX7-NEXT: v_mul_f32_e32 v4, 1.0, v6
-; GFX7-NEXT: v_mul_f32_e32 v2, 1.0, v2
-; GFX7-NEXT: v_max_f32_e32 v2, v2, v4
-; GFX7-NEXT: v_mul_f32_e32 v4, 1.0, v7
-; GFX7-NEXT: v_mul_f32_e32 v3, 1.0, v3
-; GFX7-NEXT: v_max_f32_e32 v3, v3, v4
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_maximumnum_v4f32:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_mul_f32_e32 v4, 1.0, v4
-; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0
-; GFX8-NEXT: v_max_f32_e32 v0, v0, v4
-; GFX8-NEXT: v_mul_f32_e32 v4, 1.0, v5
-; GFX8-NEXT: v_mul_f32_e32 v1, 1.0, v1
-; GFX8-NEXT: v_max_f32_e32 v1, v1, v4
-; GFX8-NEXT: v_mul_f32_e32 v4, 1.0, v6
-; GFX8-NEXT: v_mul_f32_e32 v2, 1.0, v2
-; GFX8-NEXT: v_max_f32_e32 v2, v2, v4
-; GFX8-NEXT: v_mul_f32_e32 v4, 1.0, v7
-; GFX8-NEXT: v_mul_f32_e32 v3, 1.0, v3
-; GFX8-NEXT: v_max_f32_e32 v3, v3, v4
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: v_maximumnum_v4f32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v4, v4, v4
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v4
-; GFX9-NEXT: v_max_f32_e32 v4, v5, v5
-; GFX9-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX9-NEXT: v_max_f32_e32 v1, v1, v4
-; GFX9-NEXT: v_max_f32_e32 v4, v6, v6
-; GFX9-NEXT: v_max_f32_e32 v2, v2, v2
-; GFX9-NEXT: v_max_f32_e32 v2, v2, v4
-; GFX9-NEXT: v_max_f32_e32 v4, v7, v7
-; GFX9-NEXT: v_max_f32_e32 v3, v3, v3
-; GFX9-NEXT: v_max_f32_e32 v3, v3, v4
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_maximumnum_v4f32:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_max_f32_e32 v4, v4, v4
-; GFX10-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX10-NEXT: v_max_f32_e32 v5, v5, v5
-; GFX10-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX10-NEXT: v_max_f32_e32 v6, v6, v6
-; GFX10-NEXT: v_max_f32_e32 v2, v2, v2
-; GFX10-NEXT: v_max_f32_e32 v7, v7, v7
-; GFX10-NEXT: v_max_f32_e32 v3, v3, v3
-; GFX10-NEXT: v_max_f32_e32 v0, v0, v4
-; GFX10-NEXT: v_max_f32_e32 v1, v1, v5
-; GFX10-NEXT: v_max_f32_e32 v2, v2, v6
-; GFX10-NEXT: v_max_f32_e32 v3, v3, v7
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: v_maximumnum_v4f32:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_dual_max_f32 v4, v4, v4 :: v_dual_max_f32 v5, v5, v5
-; GFX11-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
-; GFX11-NEXT: v_dual_max_f32 v6, v6, v6 :: v_dual_max_f32 v7, v7, v7
-; GFX11-NEXT: v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v3, v3, v3
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_dual_max_f32 v0, v0, v4 :: v_dual_max_f32 v1, v1, v5
-; GFX11-NEXT: v_dual_max_f32 v2, v2, v6 :: v_dual_max_f32 v3, v3, v7
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_maximumnum_v4f32:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v5, v5, v5
-; GFX12-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
-; GFX12-NEXT: v_dual_max_num_f32 v6, v6, v6 :: v_dual_max_num_f32 v7, v7, v7
-; GFX12-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX12-NEXT: v_dual_max_num_f32 v0, v0, v4 :: v_dual_max_num_f32 v1, v1, v5
-; GFX12-NEXT: v_dual_max_num_f32 v2, v2, v6 :: v_dual_max_num_f32 v3, v3, v7
-; GFX12-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_maximumnum_v4f32:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v4, 1.0, v4
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v4
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v4, 1.0, v5
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX7-SDAG-NEXT: v_max_f32_e32 v1, v1, v4
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v4, 1.0, v6
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v2, 1.0, v2
+; GFX7-SDAG-NEXT: v_max_f32_e32 v2, v2, v4
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v4, 1.0, v7
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v3, 1.0, v3
+; GFX7-SDAG-NEXT: v_max_f32_e32 v3, v3, v4
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_maximumnum_v4f32:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v2, 1.0, v2
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v3, 1.0, v3
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v4, 1.0, v4
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v5, 1.0, v5
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v6, 1.0, v6
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v7, 1.0, v7
+; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v4
+; GFX7-GISEL-NEXT: v_max_f32_e32 v1, v1, v5
+; GFX7-GISEL-NEXT: v_max_f32_e32 v2, v2, v6
+; GFX7-GISEL-NEXT: v_max_f32_e32 v3, v3, v7
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_maximumnum_v4f32:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_mul_f32_e32 v4, 1.0, v4
+; GFX8-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX8-SDAG-NEXT: v_max_f32_e32 v0, v0, v4
+; GFX8-SDAG-NEXT: v_mul_f32_e32 v4, 1.0, v5
+; GFX8-SDAG-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX8-SDAG-NEXT: v_max_f32_e32 v1, v1, v4
+; GFX8-SDAG-NEXT: v_mul_f32_e32 v4, 1.0, v6
+; GFX8-SDAG-NEXT: v_mul_f32_e32 v2, 1.0, v2
+; GFX8-SDAG-NEXT: v_max_f32_e32 v2, v2, v4
+; GFX8-SDAG-NEXT: v_mul_f32_e32 v4, 1.0, v7
+; GFX8-SDAG-NEXT: v_mul_f32_e32 v3, 1.0, v3
+; GFX8-SDAG-NEXT: v_max_f32_e32 v3, v3, v4
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_maximumnum_v4f32:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX8-GISEL-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX8-GISEL-NEXT: v_mul_f32_e32 v2, 1.0, v2
+; GFX8-GISEL-NEXT: v_mul_f32_e32 v3, 1.0, v3
+; GFX8-GISEL-NEXT: v_mul_f32_e32 v4, 1.0, v4
+; GFX8-GISEL-NEXT: v_mul_f32_e32 v5, 1.0, v5
+; GFX8-GISEL-NEXT: v_mul_f32_e32 v6, 1.0, v6
+; GFX8-GISEL-NEXT: v_mul_f32_e32 v7, 1.0, v7
+; GFX8-GISEL-NEXT: v_max_f32_e32 v0, v0, v4
+; GFX8-GISEL-NEXT: v_max_f32_e32 v1, v1, v5
+; GFX8-GISEL-NEXT: v_max_f32_e32 v2, v2, v6
+; GFX8-GISEL-NEXT: v_max_f32_e32 v3, v3, v7
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: v_maximumnum_v4f32:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_max_f32_e32 v4, v4, v4
+; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v4
+; GFX9-SDAG-NEXT: v_max_f32_e32 v4, v5, v5
+; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v4
+; GFX9-SDAG-NEXT: v_max_f32_e32 v4, v6, v6
+; GFX9-SDAG-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX9-SDAG-NEXT: v_max_f32_e32 v2, v2, v4
+; GFX9-SDAG-NEXT: v_max_f32_e32 v4, v7, v7
+; GFX9-SDAG-NEXT: v_max_f32_e32 v3, v3, v3
+; GFX9-SDAG-NEXT: v_max_f32_e32 v3, v3, v4
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_maximumnum_v4f32:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX900-GISEL-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX900-GISEL-NEXT: v_max_f32_e32 v3, v3, v3
+; GFX900-GISEL-NEXT: v_max_f32_e32 v4, v4, v4
+; GFX900-GISEL-NEXT: v_max_f32_e32 v5, v5, v5
+; GFX900-GISEL-NEXT: v_max_f32_e32 v6, v6, v6
+; GFX900-GISEL-NEXT: v_max_f32_e32 v7, v7, v7
+; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v4
+; GFX900-GISEL-NEXT: v_max_f32_e32 v1, v1, v5
+; GFX900-GISEL-NEXT: v_max_f32_e32 v2, v2, v6
+; GFX900-GISEL-NEXT: v_max_f32_e32 v3, v3, v7
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_maximumnum_v4f32:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_pk_mul_f32 v[0:1], 1.0, v[0:1] op_sel_hi:[0,1]
+; GFX950-GISEL-NEXT: v_pk_mul_f32 v[4:5], 1.0, v[4:5] op_sel_hi:[0,1]
+; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-GISEL-NEXT: v_max_f32_e32 v4, v4, v4
+; GFX950-GISEL-NEXT: v_pk_mul_f32 v[2:3], 1.0, v[2:3] op_sel_hi:[0,1]
+; GFX950-GISEL-NEXT: v_pk_mul_f32 v[6:7], 1.0, v[6:7] op_sel_hi:[0,1]
+; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v4
+; GFX950-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX950-GISEL-NEXT: v_max_f32_e32 v4, v5, v5
+; GFX950-GISEL-NEXT: v_max_f32_e32 v1, v1, v4
+; GFX950-GISEL-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX950-GISEL-NEXT: v_max_f32_e32 v4, v6, v6
+; GFX950-GISEL-NEXT: v_max_f32_e32 v2, v2, v4
+; GFX950-GISEL-NEXT: v_max_f32_e32 v3, v3, v3
+; GFX950-GISEL-NEXT: v_max_f32_e32 v4, v7, v7
+; GFX950-GISEL-NEXT: v_max_f32_e32 v3, v3, v4
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_maximumnum_v4f32:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_max_f32_e32 v4, v4, v4
+; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX10-SDAG-NEXT: v_max_f32_e32 v5, v5, v5
+; GFX10-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX10-SDAG-NEXT: v_max_f32_e32 v6, v6, v6
+; GFX10-SDAG-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX10-SDAG-NEXT: v_max_f32_e32 v7, v7, v7
+; GFX10-SDAG-NEXT: v_max_f32_e32 v3, v3, v3
+; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v4
+; GFX10-SDAG-NEXT: v_max_f32_e32 v1, v1, v5
+; GFX10-SDAG-NEXT: v_max_f32_e32 v2, v2, v6
+; GFX10-SDAG-NEXT: v_max_f32_e32 v3, v3, v7
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_maximumnum_v4f32:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX10-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX10-GISEL-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX10-GISEL-NEXT: v_max_f32_e32 v3, v3, v3
+; GFX10-GISEL-NEXT: v_max_f32_e32 v4, v4, v4
+; GFX10-GISEL-NEXT: v_max_f32_e32 v5, v5, v5
+; GFX10-GISEL-NEXT: v_max_f32_e32 v6, v6, v6
+; GFX10-GISEL-NEXT: v_max_f32_e32 v7, v7, v7
+; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v4
+; GFX10-GISEL-NEXT: v_max_f32_e32 v1, v1, v5
+; GFX10-GISEL-NEXT: v_max_f32_e32 v2, v2, v6
+; GFX10-GISEL-NEXT: v_max_f32_e32 v3, v3, v7
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_maximumnum_v4f32:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_dual_max_f32 v4, v4, v4 :: v_dual_max_f32 v5, v5, v5
+; GFX11-SDAG-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
+; GFX11-SDAG-NEXT: v_dual_max_f32 v6, v6, v6 :: v_dual_max_f32 v7, v7, v7
+; GFX11-SDAG-NEXT: v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v3, v3, v3
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-NEXT: v_dual_max_f32 v0, v0, v4 :: v_dual_max_f32 v1, v1, v5
+; GFX11-SDAG-NEXT: v_dual_max_f32 v2, v2, v6 :: v_dual_max_f32 v3, v3, v7
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_maximumnum_v4f32:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
+; GFX11-GISEL-NEXT: v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v3, v3, v3
+; GFX11-GISEL-NEXT: v_dual_max_f32 v4, v4, v4 :: v_dual_max_f32 v5, v5, v5
+; GFX11-GISEL-NEXT: v_dual_max_f32 v6, v6, v6 :: v_dual_max_f32 v7, v7, v7
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_dual_max_f32 v0, v0, v4 :: v_dual_max_f32 v1, v1, v5
+; GFX11-GISEL-NEXT: v_dual_max_f32 v2, v2, v6 :: v_dual_max_f32 v3, v3, v7
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-SDAG-LABEL: v_maximumnum_v4f32:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v5, v5, v5
+; GFX12-SDAG-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
+; GFX12-SDAG-NEXT: v_dual_max_num_f32 v6, v6, v6 :: v_dual_max_num_f32 v7, v7, v7
+; GFX12-SDAG-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3
+; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX12-SDAG-NEXT: v_dual_max_num_f32 v0, v0, v4 :: v_dual_max_num_f32 v1, v1, v5
+; GFX12-SDAG-NEXT: v_dual_max_num_f32 v2, v2, v6 :: v_dual_max_num_f32 v3, v3, v7
+; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GISEL-LABEL: v_maximumnum_v4f32:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
+; GFX12-GISEL-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3
+; GFX12-GISEL-NEXT: v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v5, v5, v5
+; GFX12-GISEL-NEXT: v_dual_max_num_f32 v6, v6, v6 :: v_dual_max_num_f32 v7, v7, v7
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX12-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v4 :: v_dual_max_num_f32 v1, v1, v5
+; GFX12-GISEL-NEXT: v_dual_max_num_f32 v2, v2, v6 :: v_dual_max_num_f32 v3, v3, v7
+; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call <4 x float> @llvm.maximumnum.v4f32(<4 x float> %x, <4 x float> %y)
ret <4 x float> %result
}
@@ -4182,88 +7284,171 @@ define <4 x float> @v_maximumnum_v4f32_nnan(<4 x float> %x, <4 x float> %y) {
}
define <2 x double> @v_maximumnum_v2f64(<2 x double> %x, <2 x double> %y) {
-; GFX7-LABEL: v_maximumnum_v2f64:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
-; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX7-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
-; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
-; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_maximumnum_v2f64:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
-; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX8-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
-; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
-; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX900-LABEL: v_maximumnum_v2f64:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
-; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX900-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
-; GFX900-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
-; GFX900-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX950-LABEL: v_maximumnum_v2f64:
-; GFX950: ; %bb.0:
-; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
-; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
-; GFX950-NEXT: v_max_f64 v[4:5], v[6:7], v[6:7]
-; GFX950-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX950-NEXT: v_max_f64 v[2:3], v[2:3], v[4:5]
-; GFX950-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_maximumnum_v2f64:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
-; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX10-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
-; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
-; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: v_maximumnum_v2f64:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
-; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX11-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
-; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
-; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_maximumnum_v2f64:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[4:5]
-; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1]
-; GFX12-NEXT: v_max_num_f64_e32 v[6:7], v[6:7], v[6:7]
-; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3]
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[4:5]
-; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[6:7]
-; GFX12-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_maximumnum_v2f64:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX7-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX7-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX7-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX7-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX7-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_maximumnum_v2f64:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX7-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX7-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX7-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX7-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX7-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_maximumnum_v2f64:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX8-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX8-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX8-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX8-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX8-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_maximumnum_v2f64:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX8-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX8-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX8-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX8-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX8-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_maximumnum_v2f64:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX900-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX900-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX900-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX900-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX900-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_maximumnum_v2f64:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX900-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX900-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX900-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX900-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX900-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_maximumnum_v2f64:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[6:7], v[6:7]
+; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[4:5]
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_maximumnum_v2f64:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX950-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX950-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX950-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX950-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX950-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_maximumnum_v2f64:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX10-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX10-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX10-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX10-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX10-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_maximumnum_v2f64:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX10-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX10-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX10-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX10-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX10-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_maximumnum_v2f64:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX11-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX11-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX11-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX11-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_maximumnum_v2f64:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX11-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX11-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX11-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX11-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-SDAG-LABEL: v_maximumnum_v2f64:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[4:5]
+; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1]
+; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[6:7], v[6:7], v[6:7]
+; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3]
+; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[4:5]
+; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[6:7]
+; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GISEL-LABEL: v_maximumnum_v2f64:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1]
+; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[4:5]
+; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3]
+; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[6:7], v[6:7], v[6:7]
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[4:5]
+; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[6:7]
+; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call <2 x double> @llvm.maximumnum.v2f64(<2 x double> %x, <2 x double> %y)
ret <2 x double> %result
}
@@ -4319,109 +7504,213 @@ define <2 x double> @v_maximumnum_v2f64_nnan(<2 x double> %x, <2 x double> %y) {
}
define <3 x double> @v_maximumnum_v3f64(<3 x double> %x, <3 x double> %y) {
-; GFX7-LABEL: v_maximumnum_v3f64:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
-; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX7-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
-; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX7-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX7-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
-; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
-; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
-; GFX7-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_maximumnum_v3f64:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
-; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX8-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
-; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX8-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
-; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
-; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
-; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX900-LABEL: v_maximumnum_v3f64:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
-; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX900-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
-; GFX900-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX900-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX900-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
-; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
-; GFX900-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
-; GFX900-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX950-LABEL: v_maximumnum_v3f64:
-; GFX950: ; %bb.0:
-; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
-; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
-; GFX950-NEXT: v_max_f64 v[6:7], v[8:9], v[8:9]
-; GFX950-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX950-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
-; GFX950-NEXT: v_max_f64 v[6:7], v[10:11], v[10:11]
-; GFX950-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
-; GFX950-NEXT: v_max_f64 v[4:5], v[4:5], v[6:7]
-; GFX950-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_maximumnum_v3f64:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
-; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX10-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
-; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX10-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX10-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
-; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
-; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
-; GFX10-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: v_maximumnum_v3f64:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
-; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX11-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
-; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX11-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
-; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
-; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_maximumnum_v3f64:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_max_num_f64_e32 v[6:7], v[6:7], v[6:7]
-; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1]
-; GFX12-NEXT: v_max_num_f64_e32 v[8:9], v[8:9], v[8:9]
-; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3]
-; GFX12-NEXT: v_max_num_f64_e32 v[10:11], v[10:11], v[10:11]
-; GFX12-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[4:5]
-; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[6:7]
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[8:9]
-; GFX12-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[10:11]
-; GFX12-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_maximumnum_v3f64:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX7-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX7-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
+; GFX7-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX7-SDAG-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
+; GFX7-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX7-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX7-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX7-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_maximumnum_v3f64:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX7-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX7-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX7-GISEL-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
+; GFX7-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX7-GISEL-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
+; GFX7-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX7-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX7-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_maximumnum_v3f64:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX8-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX8-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
+; GFX8-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX8-SDAG-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
+; GFX8-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX8-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX8-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX8-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_maximumnum_v3f64:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX8-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX8-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX8-GISEL-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
+; GFX8-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX8-GISEL-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
+; GFX8-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX8-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX8-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_maximumnum_v3f64:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX900-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX900-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
+; GFX900-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX900-SDAG-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
+; GFX900-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX900-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX900-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX900-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_maximumnum_v3f64:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX900-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX900-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX900-GISEL-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
+; GFX900-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX900-GISEL-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
+; GFX900-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX900-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX900-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_maximumnum_v3f64:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[8:9], v[8:9]
+; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
+; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[10:11], v[10:11]
+; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[6:7]
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_maximumnum_v3f64:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX950-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX950-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX950-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX950-GISEL-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
+; GFX950-GISEL-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
+; GFX950-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX950-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX950-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_maximumnum_v3f64:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX10-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX10-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
+; GFX10-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX10-SDAG-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
+; GFX10-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX10-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX10-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX10-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_maximumnum_v3f64:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX10-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX10-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX10-GISEL-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
+; GFX10-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX10-GISEL-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
+; GFX10-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX10-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX10-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_maximumnum_v3f64:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX11-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX11-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
+; GFX11-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX11-SDAG-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
+; GFX11-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX11-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX11-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_maximumnum_v3f64:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX11-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX11-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX11-GISEL-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
+; GFX11-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX11-GISEL-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
+; GFX11-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX11-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-SDAG-LABEL: v_maximumnum_v3f64:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[6:7], v[6:7], v[6:7]
+; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1]
+; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[8:9], v[8:9], v[8:9]
+; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3]
+; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[10:11], v[10:11], v[10:11]
+; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[4:5]
+; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[6:7]
+; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[8:9]
+; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[10:11]
+; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GISEL-LABEL: v_maximumnum_v3f64:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1]
+; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[6:7], v[6:7], v[6:7]
+; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3]
+; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[8:9], v[8:9], v[8:9]
+; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[4:5]
+; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[10:11], v[10:11], v[10:11]
+; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[6:7]
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[8:9]
+; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[10:11]
+; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call <3 x double> @llvm.maximumnum.v3f64(<3 x double> %x, <3 x double> %y)
ret <3 x double> %result
}
@@ -4434,179 +7723,304 @@ define <3 x double> @v_maximumnum_v3f64_nnan(<3 x double> %x, <3 x double> %y) {
; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
; GFX7-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_maximumnum_v3f64_nnan:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
-; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
-; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: v_maximumnum_v3f64_nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
-; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
-; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_maximumnum_v3f64_nnan:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
-; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
-; GFX10-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: v_maximumnum_v3f64_nnan:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
-; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
-; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_maximumnum_v3f64_nnan:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[6:7]
-; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[8:9]
-; GFX12-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[10:11]
-; GFX12-NEXT: s_setpc_b64 s[30:31]
- %result = call nnan <3 x double> @llvm.maximumnum.v3f64(<3 x double> %x, <3 x double> %y)
- ret <3 x double> %result
-}
-
-define <4 x double> @v_maximumnum_v4f64(<4 x double> %x, <4 x double> %y) {
-; GFX7-LABEL: v_maximumnum_v4f64:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
-; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX7-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX7-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX7-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
-; GFX7-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15]
-; GFX7-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
-; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
-; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
-; GFX7-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
-; GFX7-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_maximumnum_v4f64:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
-; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX8-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX8-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
-; GFX8-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15]
-; GFX8-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
-; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
-; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
-; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
-; GFX8-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
+;
+; GFX8-LABEL: v_maximumnum_v3f64_nnan:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX900-LABEL: v_maximumnum_v4f64:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
-; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX900-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX900-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX900-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX900-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
-; GFX900-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15]
-; GFX900-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
-; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
-; GFX900-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
-; GFX900-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
-; GFX900-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX950-LABEL: v_maximumnum_v4f64:
-; GFX950: ; %bb.0:
-; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
-; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
-; GFX950-NEXT: v_max_f64 v[8:9], v[10:11], v[10:11]
-; GFX950-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX950-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
-; GFX950-NEXT: v_max_f64 v[8:9], v[12:13], v[12:13]
-; GFX950-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
-; GFX950-NEXT: v_max_f64 v[4:5], v[4:5], v[8:9]
-; GFX950-NEXT: v_max_f64 v[8:9], v[14:15], v[14:15]
-; GFX950-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
-; GFX950-NEXT: v_max_f64 v[6:7], v[6:7], v[8:9]
-; GFX950-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_maximumnum_v4f64:
+; GFX9-LABEL: v_maximumnum_v3f64_nnan:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_maximumnum_v3f64_nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
-; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX10-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX10-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX10-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
-; GFX10-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15]
-; GFX10-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
-; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
-; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
-; GFX10-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
-; GFX10-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
+; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX10-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: v_maximumnum_v4f64:
+; GFX11-LABEL: v_maximumnum_v3f64_nnan:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
-; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX11-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX11-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
-; GFX11-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15]
-; GFX11-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
-; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
-; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
-; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX11-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
+; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
-; GFX12-LABEL: v_maximumnum_v4f64:
+; GFX12-LABEL: v_maximumnum_v3f64_nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_max_num_f64_e32 v[8:9], v[8:9], v[8:9]
-; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1]
-; GFX12-NEXT: v_max_num_f64_e32 v[10:11], v[10:11], v[10:11]
-; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3]
-; GFX12-NEXT: v_max_num_f64_e32 v[12:13], v[12:13], v[12:13]
-; GFX12-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[4:5]
-; GFX12-NEXT: v_max_num_f64_e32 v[14:15], v[14:15], v[14:15]
-; GFX12-NEXT: v_max_num_f64_e32 v[6:7], v[6:7], v[6:7]
-; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[8:9]
-; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[10:11]
-; GFX12-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[12:13]
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX12-NEXT: v_max_num_f64_e32 v[6:7], v[6:7], v[14:15]
+; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[6:7]
+; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[8:9]
+; GFX12-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[10:11]
; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %result = call nnan <3 x double> @llvm.maximumnum.v3f64(<3 x double> %x, <3 x double> %y)
+ ret <3 x double> %result
+}
+
+define <4 x double> @v_maximumnum_v4f64(<4 x double> %x, <4 x double> %y) {
+; GFX7-SDAG-LABEL: v_maximumnum_v4f64:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
+; GFX7-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX7-SDAG-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
+; GFX7-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX7-SDAG-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
+; GFX7-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX7-SDAG-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15]
+; GFX7-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX7-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX7-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
+; GFX7-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
+; GFX7-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_maximumnum_v4f64:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX7-GISEL-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
+; GFX7-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX7-GISEL-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
+; GFX7-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX7-GISEL-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
+; GFX7-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX7-GISEL-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15]
+; GFX7-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX7-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
+; GFX7-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
+; GFX7-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_maximumnum_v4f64:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
+; GFX8-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX8-SDAG-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
+; GFX8-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX8-SDAG-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
+; GFX8-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX8-SDAG-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15]
+; GFX8-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX8-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX8-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
+; GFX8-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
+; GFX8-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_maximumnum_v4f64:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX8-GISEL-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
+; GFX8-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX8-GISEL-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
+; GFX8-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX8-GISEL-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
+; GFX8-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX8-GISEL-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15]
+; GFX8-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX8-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
+; GFX8-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
+; GFX8-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_maximumnum_v4f64:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
+; GFX900-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX900-SDAG-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
+; GFX900-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX900-SDAG-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
+; GFX900-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX900-SDAG-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15]
+; GFX900-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX900-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX900-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
+; GFX900-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
+; GFX900-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_maximumnum_v4f64:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX900-GISEL-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
+; GFX900-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX900-GISEL-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
+; GFX900-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX900-GISEL-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
+; GFX900-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX900-GISEL-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15]
+; GFX900-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX900-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
+; GFX900-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
+; GFX900-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_maximumnum_v4f64:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
+; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX950-SDAG-NEXT: v_max_f64 v[8:9], v[10:11], v[10:11]
+; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX950-SDAG-NEXT: v_max_f64 v[8:9], v[12:13], v[12:13]
+; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[8:9]
+; GFX950-SDAG-NEXT: v_max_f64 v[8:9], v[14:15], v[14:15]
+; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[8:9]
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_maximumnum_v4f64:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX950-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX950-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX950-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX950-GISEL-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
+; GFX950-GISEL-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
+; GFX950-GISEL-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
+; GFX950-GISEL-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15]
+; GFX950-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX950-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
+; GFX950-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
+; GFX950-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_maximumnum_v4f64:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
+; GFX10-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX10-SDAG-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
+; GFX10-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX10-SDAG-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
+; GFX10-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX10-SDAG-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15]
+; GFX10-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX10-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX10-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
+; GFX10-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
+; GFX10-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_maximumnum_v4f64:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX10-GISEL-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
+; GFX10-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX10-GISEL-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
+; GFX10-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX10-GISEL-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
+; GFX10-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX10-GISEL-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15]
+; GFX10-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX10-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
+; GFX10-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
+; GFX10-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_maximumnum_v4f64:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
+; GFX11-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX11-SDAG-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
+; GFX11-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX11-SDAG-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
+; GFX11-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX11-SDAG-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15]
+; GFX11-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX11-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX11-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
+; GFX11-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX11-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_maximumnum_v4f64:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX11-GISEL-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
+; GFX11-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX11-GISEL-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
+; GFX11-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX11-GISEL-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
+; GFX11-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX11-GISEL-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15]
+; GFX11-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX11-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
+; GFX11-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX11-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-SDAG-LABEL: v_maximumnum_v4f64:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[8:9], v[8:9], v[8:9]
+; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1]
+; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[10:11], v[10:11], v[10:11]
+; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3]
+; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[12:13], v[12:13], v[12:13]
+; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[4:5]
+; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[14:15], v[14:15], v[14:15]
+; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[6:7], v[6:7], v[6:7]
+; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[8:9]
+; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[10:11]
+; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[12:13]
+; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[6:7], v[6:7], v[14:15]
+; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GISEL-LABEL: v_maximumnum_v4f64:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1]
+; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[8:9], v[8:9], v[8:9]
+; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3]
+; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[10:11], v[10:11], v[10:11]
+; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[4:5]
+; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[12:13], v[12:13], v[12:13]
+; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[6:7], v[6:7], v[6:7]
+; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[14:15], v[14:15], v[14:15]
+; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[8:9]
+; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[10:11]
+; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[12:13]
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[6:7], v[6:7], v[14:15]
+; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call <4 x double> @llvm.maximumnum.v4f64(<4 x double> %x, <4 x double> %y)
ret <4 x double> %result
}
@@ -4674,97 +8088,183 @@ define <4 x double> @v_maximumnum_v4f64_nnan(<4 x double> %x, <4 x double> %y) {
}
define half @v_maximumnum_f16_no_ieee(half %x, half %y) #0 {
-; GFX7-LABEL: v_maximumnum_f16_no_ieee:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_maximumnum_f16_no_ieee:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_max_f16_e32 v1, v1, v1
-; GFX8-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX8-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: v_maximumnum_f16_no_ieee:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f16_e32 v1, v1, v1
-; GFX9-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX9-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_maximumnum_f16_no_ieee:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_max_f16_e32 v1, v1, v1
-; GFX10-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX10-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-TRUE16-LABEL: v_maximumnum_f16_no_ieee:
-; GFX11-TRUE16: ; %bb.0:
-; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.h, v1.l, v1.l
-; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.h
-; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FAKE16-LABEL: v_maximumnum_f16_no_ieee:
-; GFX11-FAKE16: ; %bb.0:
-; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FAKE16-NEXT: v_max_f16_e32 v1, v1, v1
-; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-TRUE16-LABEL: v_maximumnum_f16_no_ieee:
-; GFX12-TRUE16: ; %bb.0:
-; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
-; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l
-; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
-; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
-; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-FAKE16-LABEL: v_maximumnum_f16_no_ieee:
-; GFX12-FAKE16: ; %bb.0:
-; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
-; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1
-; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0
-; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v1
-; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_maximumnum_f16_no_ieee:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_maximumnum_f16_no_ieee:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_maximumnum_f16_no_ieee:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_maximumnum_f16_no_ieee:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX8-GISEL-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: v_maximumnum_f16_no_ieee:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: v_maximumnum_f16_no_ieee:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX9-GISEL-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_maximumnum_f16_no_ieee:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX10-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX10-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_maximumnum_f16_no_ieee:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX10-GISEL-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX10-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-SDAG-LABEL: v_maximumnum_f16_no_ieee:
+; GFX11-TRUE16-SDAG: ; %bb.0:
+; GFX11-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e32 v0.h, v1.l, v1.l
+; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l
+; GFX11-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e32 v0.l, v0.l, v0.h
+; GFX11-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-GISEL-LABEL: v_maximumnum_f16_no_ieee:
+; GFX11-TRUE16-GISEL: ; %bb.0:
+; GFX11-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l
+; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e32 v0.h, v1.l, v1.l
+; GFX11-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e32 v0.l, v0.l, v0.h
+; GFX11-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-SDAG-LABEL: v_maximumnum_f16_no_ieee:
+; GFX11-FAKE16-SDAG: ; %bb.0:
+; GFX11-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX11-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX11-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-GISEL-LABEL: v_maximumnum_f16_no_ieee:
+; GFX11-FAKE16-GISEL: ; %bb.0:
+; GFX11-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX11-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-TRUE16-SDAG-LABEL: v_maximumnum_f16_no_ieee:
+; GFX12-TRUE16-SDAG: ; %bb.0:
+; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l
+; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GFX12-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
+; GFX12-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-TRUE16-GISEL-LABEL: v_maximumnum_f16_no_ieee:
+; GFX12-TRUE16-GISEL: ; %bb.0:
+; GFX12-TRUE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l
+; GFX12-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
+; GFX12-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-FAKE16-SDAG-LABEL: v_maximumnum_f16_no_ieee:
+; GFX12-FAKE16-SDAG: ; %bb.0:
+; GFX12-FAKE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v1, v1, v1
+; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GFX12-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v1
+; GFX12-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-FAKE16-GISEL-LABEL: v_maximumnum_f16_no_ieee:
+; GFX12-FAKE16-GISEL: ; %bb.0:
+; GFX12-FAKE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v1, v1, v1
+; GFX12-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v1
+; GFX12-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call half @llvm.maximumnum.f16(half %x, half %y)
ret half %result
}
define half @v_maximumnum_f16_nan_no_ieee(half %x, half %y) #0 {
-; GFX7-LABEL: v_maximumnum_f16_nan_no_ieee:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX7-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_maximumnum_f16_nan_no_ieee:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_maximumnum_f16_nan_no_ieee:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximumnum_f16_nan_no_ieee:
; GFX8: ; %bb.0:
@@ -4820,57 +8320,109 @@ define half @v_maximumnum_f16_nan_no_ieee(half %x, half %y) #0 {
}
define float @v_maximumnum_f32_no_ieee(float %x, float %y) #0 {
-; GFX7-LABEL: v_maximumnum_f32_no_ieee:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
-; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
-; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_maximumnum_f32_no_ieee:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_mul_f32_e32 v1, 1.0, v1
-; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0
-; GFX8-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: v_maximumnum_f32_no_ieee:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_maximumnum_f32_no_ieee:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX10-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX10-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: v_maximumnum_f32_no_ieee:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v0, v0, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_maximumnum_f32_no_ieee:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v1
-; GFX12-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_maximumnum_f32_no_ieee:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_maximumnum_f32_no_ieee:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_maximumnum_f32_no_ieee:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX8-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX8-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_maximumnum_f32_no_ieee:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX8-GISEL-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX8-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: v_maximumnum_f32_no_ieee:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: v_maximumnum_f32_no_ieee:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX9-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_maximumnum_f32_no_ieee:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_maximumnum_f32_no_ieee:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX10-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_maximumnum_f32_no_ieee:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v0, v0, v0
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_maximumnum_f32_no_ieee:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-SDAG-LABEL: v_maximumnum_f32_no_ieee:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0
+; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v1
+; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GISEL-LABEL: v_maximumnum_f32_no_ieee:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v1
+; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call float @llvm.maximumnum.f32(float %x, float %y)
ret float %result
}
@@ -4920,59 +8472,113 @@ define float @v_maximumnum_f32_nnan_no_ieee(float %x, float %y) #0 {
}
define double @v_maximumnum_f64_no_ieee(double %x, double %y) #0 {
-; GFX7-LABEL: v_maximumnum_f64_no_ieee:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_maximumnum_f64_no_ieee:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: v_maximumnum_f64_no_ieee:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_maximumnum_f64_no_ieee:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: v_maximumnum_f64_no_ieee:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_maximumnum_f64_no_ieee:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3]
-; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1]
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[2:3]
-; GFX12-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_maximumnum_f64_no_ieee:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX7-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX7-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_maximumnum_f64_no_ieee:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX7-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX7-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_maximumnum_f64_no_ieee:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX8-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX8-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_maximumnum_f64_no_ieee:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX8-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX8-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: v_maximumnum_f64_no_ieee:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX9-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX9-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: v_maximumnum_f64_no_ieee:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX9-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX9-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_maximumnum_f64_no_ieee:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX10-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX10-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_maximumnum_f64_no_ieee:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX10-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX10-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_maximumnum_f64_no_ieee:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX11-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_maximumnum_f64_no_ieee:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX11-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-SDAG-LABEL: v_maximumnum_f64_no_ieee:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3]
+; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1]
+; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[2:3]
+; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GISEL-LABEL: v_maximumnum_f64_no_ieee:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1]
+; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3]
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[2:3]
+; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call double @llvm.maximumnum.f64(double %x, double %y)
ret double %result
}
@@ -5022,106 +8628,199 @@ define double @v_maximumnum_f64_nnan_no_ieee(double %x, double %y) #0 {
}
define <2 x half> @v_maximumnum_v2f16_no_ieee(<2 x half> %x, <2 x half> %y) #0 {
-; GFX7-LABEL: v_maximumnum_v2f16_no_ieee:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
-; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
-; GFX7-NEXT: v_max_f32_e32 v0, v0, v2
-; GFX7-NEXT: v_max_f32_e32 v1, v1, v3
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_maximumnum_v2f16_no_ieee:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_max_f16_sdwa v2, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v3, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_e32 v1, v1, v1
-; GFX8-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX8-NEXT: v_max_f16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX900-LABEL: v_maximumnum_v2f16_no_ieee:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX900-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX900-NEXT: v_pk_max_f16 v0, v0, v1
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX950-LABEL: v_maximumnum_v2f16_no_ieee:
-; GFX950: ; %bb.0:
-; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX950-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX950-NEXT: s_nop 0
-; GFX950-NEXT: v_pk_max_f16 v0, v0, v1
-; GFX950-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_maximumnum_v2f16_no_ieee:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX10-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX10-NEXT: v_pk_max_f16 v0, v0, v1
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: v_maximumnum_v2f16_no_ieee:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX11-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_pk_max_f16 v0, v0, v1
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_maximumnum_v2f16_no_ieee:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v1
-; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v0
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v1
-; GFX12-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_maximumnum_v2f16_no_ieee:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX7-SDAG-NEXT: v_max_f32_e32 v1, v1, v3
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_maximumnum_v2f16_no_ieee:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: v_max_f32_e32 v1, v1, v3
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_maximumnum_v2f16_no_ieee:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v2, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v3, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v2
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_maximumnum_v2f16_no_ieee:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_max_f16_e32 v2, v0, v0
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v3, v1, v1
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v2, v2, v3
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_maximumnum_v2f16_no_ieee:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_maximumnum_v2f16_no_ieee:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX900-GISEL-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX900-GISEL-NEXT: v_pk_max_f16 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_maximumnum_v2f16_no_ieee:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX950-SDAG-NEXT: s_nop 0
+; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v1
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_maximumnum_v2f16_no_ieee:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX950-GISEL-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX950-GISEL-NEXT: s_nop 0
+; GFX950-GISEL-NEXT: v_pk_max_f16 v0, v0, v1
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_maximumnum_v2f16_no_ieee:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX10-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX10-SDAG-NEXT: v_pk_max_f16 v0, v0, v1
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_maximumnum_v2f16_no_ieee:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX10-GISEL-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX10-GISEL-NEXT: v_pk_max_f16 v0, v0, v1
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_maximumnum_v2f16_no_ieee:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX11-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_pk_max_f16 v0, v0, v1
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_maximumnum_v2f16_no_ieee:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX11-GISEL-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_pk_max_f16 v0, v0, v1
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-SDAG-LABEL: v_maximumnum_v2f16_no_ieee:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v1
+; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GISEL-LABEL: v_maximumnum_v2f16_no_ieee:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v1
+; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call <2 x half> @llvm.maximumnum.v2f16(<2 x half> %x, <2 x half> %y)
ret <2 x half> %result
}
define <2 x half> @v_maximumnum_v2f16_nnan_no_ieee(<2 x half> %x, <2 x half> %y) #0 {
-; GFX7-LABEL: v_maximumnum_v2f16_nnan_no_ieee:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
-; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_max_f32_e32 v0, v0, v2
-; GFX7-NEXT: v_max_f32_e32 v1, v1, v3
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_maximumnum_v2f16_nnan_no_ieee:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_max_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
-; GFX8-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_maximumnum_v2f16_nnan_no_ieee:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX7-SDAG-NEXT: v_max_f32_e32 v1, v1, v3
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_maximumnum_v2f16_nnan_no_ieee:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: v_max_f32_e32 v1, v1, v3
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_maximumnum_v2f16_nnan_no_ieee:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v2
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_maximumnum_v2f16_nnan_no_ieee:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_max_f16_e32 v2, v0, v1
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_maximumnum_v2f16_nnan_no_ieee:
; GFX9: ; %bb.0:
@@ -5155,34 +8854,60 @@ define <2 x half> @v_maximumnum_v2f16_nnan_no_ieee(<2 x half> %x, <2 x half> %y)
}
define <3 x half> @v_maximumnum_v3f16_nnan_no_ieee(<3 x half> %x, <3 x half> %y) #0 {
-; GFX7-LABEL: v_maximumnum_v3f16_nnan_no_ieee:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
-; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
-; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
-; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
-; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
-; GFX7-NEXT: v_max_f32_e32 v0, v0, v3
-; GFX7-NEXT: v_max_f32_e32 v1, v1, v4
-; GFX7-NEXT: v_max_f32_e32 v2, v2, v5
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_maximumnum_v3f16_nnan_no_ieee:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_max_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_e32 v0, v0, v2
-; GFX8-NEXT: v_max_f16_e32 v1, v1, v3
-; GFX8-NEXT: v_or_b32_e32 v0, v0, v4
-; GFX8-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_maximumnum_v3f16_nnan_no_ieee:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v5, v5
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v4, v4
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v3
+; GFX7-SDAG-NEXT: v_max_f32_e32 v1, v1, v4
+; GFX7-SDAG-NEXT: v_max_f32_e32 v2, v2, v5
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_maximumnum_v3f16_nnan_no_ieee:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v3
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v4
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v5
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: v_max_f32_e32 v1, v1, v3
+; GFX7-GISEL-NEXT: v_max_f32_e32 v2, v2, v4
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_maximumnum_v3f16_nnan_no_ieee:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v2
+; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v3
+; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v4
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_maximumnum_v3f16_nnan_no_ieee:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_max_f16_e32 v4, v0, v2
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v1, v1, v3
+; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_maximumnum_v3f16_nnan_no_ieee:
; GFX9: ; %bb.0:
@@ -5220,41 +8945,73 @@ define <3 x half> @v_maximumnum_v3f16_nnan_no_ieee(<3 x half> %x, <3 x half> %y)
}
define <4 x half> @v_maximumnum_v4f16_nnan_no_ieee(<4 x half> %x, <4 x half> %y) #0 {
-; GFX7-LABEL: v_maximumnum_v4f16_nnan_no_ieee:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
-; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
-; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
-; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
-; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
-; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
-; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
-; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
-; GFX7-NEXT: v_max_f32_e32 v0, v0, v4
-; GFX7-NEXT: v_max_f32_e32 v1, v1, v5
-; GFX7-NEXT: v_max_f32_e32 v2, v2, v6
-; GFX7-NEXT: v_max_f32_e32 v3, v3, v7
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_maximumnum_v4f16_nnan_no_ieee:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_max_f16_sdwa v4, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_e32 v1, v1, v3
-; GFX8-NEXT: v_max_f16_e32 v0, v0, v2
-; GFX8-NEXT: v_or_b32_e32 v0, v0, v5
-; GFX8-NEXT: v_or_b32_e32 v1, v1, v4
-; GFX8-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_maximumnum_v4f16_nnan_no_ieee:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v7, v7
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v6, v6
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v5, v5
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v4, v4
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v7, v7
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v6, v6
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v4
+; GFX7-SDAG-NEXT: v_max_f32_e32 v1, v1, v5
+; GFX7-SDAG-NEXT: v_max_f32_e32 v2, v2, v6
+; GFX7-SDAG-NEXT: v_max_f32_e32 v3, v3, v7
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_maximumnum_v4f16_nnan_no_ieee:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v4
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v6
+; GFX7-GISEL-NEXT: v_max_f32_e32 v1, v1, v5
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v5, v7
+; GFX7-GISEL-NEXT: v_max_f32_e32 v2, v2, v4
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-GISEL-NEXT: v_max_f32_e32 v3, v3, v5
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_maximumnum_v4f16_nnan_no_ieee:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v4, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v3
+; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v2
+; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v5
+; GFX8-SDAG-NEXT: v_or_b32_e32 v1, v1, v4
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_maximumnum_v4f16_nnan_no_ieee:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_max_f16_e32 v4, v0, v2
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v2, v1, v3
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v1, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0
+; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v2, v1
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_maximumnum_v4f16_nnan_no_ieee:
; GFX9: ; %bb.0:
@@ -5292,3 +9049,6 @@ define <4 x half> @v_maximumnum_v4f16_nnan_no_ieee(<4 x half> %x, <4 x half> %y)
}
attributes #0 = { "amdgpu-ieee"="false" }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX900: {{.*}}
+; GFX950: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/minimumnum.ll b/llvm/test/CodeGen/AMDGPU/minimumnum.ll
index b12385d19c617..5cb051d2ab857 100644
--- a/llvm/test/CodeGen/AMDGPU/minimumnum.ll
+++ b/llvm/test/CodeGen/AMDGPU/minimumnum.ll
@@ -1,106 +1,209 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck -check-prefix=GFX7 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefix=GFX8 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefix=GFX10 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-TRUE16 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck -check-prefixes=GFX7,GFX7-SDAG %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck -check-prefixes=GFX7,GFX7-GISEL %s
+
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8,GFX8-SDAG %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8,GFX8-GISEL %s
+
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900,GFX9-SDAG,GFX900-SDAG %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900,GFX9-GISEL,GFX900-GISEL %s
+
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950,GFX9-SDAG,GFX950-SDAG %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950,GFX9-GISEL,GFX950-GISEL %s
+
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10,GFX10-SDAG %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10,GFX10-GISEL %s
+
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16,GFX11-SDAG,GFX11-TRUE16-SDAG %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16,GFX11-GISEL,GFX11-TRUE16-GISEL %s
+
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16,GFX11-SDAG,GFX11-FAKE16-SDAG %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16,GFX11-GISEL,GFX11-FAKE16-GISEL %s
+
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-TRUE16,GFX12-SDAG,GFX12-TRUE16-SDAG %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-TRUE16,GFX12-GISEL,GFX12-TRUE16-GISEL %s
+
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16,GFX12-SDAG,GFX12-FAKE16-SDAG %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16,GFX12-GISEL,GFX12-FAKE16-GISEL %s
define half @v_minimumnum_f16(half %x, half %y) {
-; GFX7-LABEL: v_minimumnum_f16:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_minimumnum_f16:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_max_f16_e32 v1, v1, v1
-; GFX8-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX8-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: v_minimumnum_f16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f16_e32 v1, v1, v1
-; GFX9-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX9-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_minimumnum_f16:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_max_f16_e32 v1, v1, v1
-; GFX10-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX10-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-TRUE16-LABEL: v_minimumnum_f16:
-; GFX11-TRUE16: ; %bb.0:
-; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.h, v1.l, v1.l
-; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-TRUE16-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h
-; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FAKE16-LABEL: v_minimumnum_f16:
-; GFX11-FAKE16: ; %bb.0:
-; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FAKE16-NEXT: v_max_f16_e32 v1, v1, v1
-; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-FAKE16-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-TRUE16-LABEL: v_minimumnum_f16:
-; GFX12-TRUE16: ; %bb.0:
-; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
-; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l
-; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
-; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
-; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-FAKE16-LABEL: v_minimumnum_f16:
-; GFX12-FAKE16: ; %bb.0:
-; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
-; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1
-; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0
-; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1
-; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_minimumnum_f16:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_minimumnum_f16:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_minimumnum_f16:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_minimumnum_f16:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX8-GISEL-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: v_minimumnum_f16:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX9-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: v_minimumnum_f16:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX9-GISEL-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX9-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_minimumnum_f16:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX10-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX10-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_minimumnum_f16:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX10-GISEL-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX10-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-SDAG-LABEL: v_minimumnum_f16:
+; GFX11-TRUE16-SDAG: ; %bb.0:
+; GFX11-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e32 v0.h, v1.l, v1.l
+; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l
+; GFX11-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-SDAG-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h
+; GFX11-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-GISEL-LABEL: v_minimumnum_f16:
+; GFX11-TRUE16-GISEL: ; %bb.0:
+; GFX11-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l
+; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e32 v0.h, v1.l, v1.l
+; GFX11-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-GISEL-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h
+; GFX11-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-SDAG-LABEL: v_minimumnum_f16:
+; GFX11-FAKE16-SDAG: ; %bb.0:
+; GFX11-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX11-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX11-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-GISEL-LABEL: v_minimumnum_f16:
+; GFX11-FAKE16-GISEL: ; %bb.0:
+; GFX11-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX11-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-TRUE16-SDAG-LABEL: v_minimumnum_f16:
+; GFX12-TRUE16-SDAG: ; %bb.0:
+; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l
+; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GFX12-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-TRUE16-SDAG-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
+; GFX12-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-TRUE16-GISEL-LABEL: v_minimumnum_f16:
+; GFX12-TRUE16-GISEL: ; %bb.0:
+; GFX12-TRUE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l
+; GFX12-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-TRUE16-GISEL-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
+; GFX12-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-FAKE16-SDAG-LABEL: v_minimumnum_f16:
+; GFX12-FAKE16-SDAG: ; %bb.0:
+; GFX12-FAKE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v1, v1, v1
+; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GFX12-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-FAKE16-SDAG-NEXT: v_min_num_f16_e32 v0, v0, v1
+; GFX12-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-FAKE16-GISEL-LABEL: v_minimumnum_f16:
+; GFX12-FAKE16-GISEL: ; %bb.0:
+; GFX12-FAKE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v1, v1, v1
+; GFX12-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-FAKE16-GISEL-NEXT: v_min_num_f16_e32 v0, v0, v1
+; GFX12-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call half @llvm.minimumnum.f16(half %x, half %y)
ret half %result
}
define half @v_minimumnum_f16_nnan(half %x, half %y) {
-; GFX7-LABEL: v_minimumnum_f16_nnan:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX7-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_minimumnum_f16_nnan:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_minimumnum_f16_nnan:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimumnum_f16_nnan:
; GFX8: ; %bb.0:
@@ -156,13 +259,22 @@ define half @v_minimumnum_f16_nnan(half %x, half %y) {
}
define half @v_minimumnum_f16_1.0(half %x) {
-; GFX7-LABEL: v_minimumnum_f16_1.0:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_min_f32_e32 v0, 1.0, v0
-; GFX7-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_minimumnum_f16_1.0:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT: v_min_f32_e32 v0, 1.0, v0
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_minimumnum_f16_1.0:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, 1.0
+; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimumnum_f16_1.0:
; GFX8: ; %bb.0:
@@ -229,57 +341,109 @@ define half @v_minimumnum_f16_1.0(half %x) {
}
define float @v_minimumnum_f32(float %x, float %y) {
-; GFX7-LABEL: v_minimumnum_f32:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
-; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
-; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_minimumnum_f32:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_mul_f32_e32 v1, 1.0, v1
-; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0
-; GFX8-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: v_minimumnum_f32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_minimumnum_f32:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX10-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX10-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: v_minimumnum_f32:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v0, v0, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_minimumnum_f32:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT: v_min_num_f32_e32 v0, v0, v1
-; GFX12-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_minimumnum_f32:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_minimumnum_f32:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_minimumnum_f32:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX8-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX8-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_minimumnum_f32:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX8-GISEL-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX8-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: v_minimumnum_f32:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX9-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: v_minimumnum_f32:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX9-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX9-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_minimumnum_f32:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX10-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_minimumnum_f32:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX10-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX10-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_minimumnum_f32:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v0, v0, v0
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_minimumnum_f32:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-SDAG-LABEL: v_minimumnum_f32:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0
+; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-SDAG-NEXT: v_min_num_f32_e32 v0, v0, v1
+; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GISEL-LABEL: v_minimumnum_f32:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-GISEL-NEXT: v_min_num_f32_e32 v0, v0, v1
+; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call float @llvm.minimumnum.f32(float %x, float %y)
ret float %result
}
@@ -329,59 +493,113 @@ define float @v_minimumnum_f32_nnan(float %x, float %y) {
}
define double @v_minimumnum_f64(double %x, double %y) {
-; GFX7-LABEL: v_minimumnum_f64:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_minimumnum_f64:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: v_minimumnum_f64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_minimumnum_f64:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: v_minimumnum_f64:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_minimumnum_f64:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3]
-; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1]
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[2:3]
-; GFX12-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_minimumnum_f64:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX7-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX7-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_minimumnum_f64:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX7-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX7-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_minimumnum_f64:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX8-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX8-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_minimumnum_f64:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX8-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX8-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: v_minimumnum_f64:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX9-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX9-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: v_minimumnum_f64:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX9-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX9-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_minimumnum_f64:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX10-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX10-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_minimumnum_f64:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX10-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX10-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_minimumnum_f64:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX11-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_minimumnum_f64:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX11-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-SDAG-LABEL: v_minimumnum_f64:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3]
+; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1]
+; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-SDAG-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[2:3]
+; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GISEL-LABEL: v_minimumnum_f64:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1]
+; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3]
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-GISEL-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[2:3]
+; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call double @llvm.minimumnum.f64(double %x, double %y)
ret double %result
}
@@ -707,432 +925,820 @@ define double @v_minimumnum_f64_1.0(double %x) {
}
define half @v_minimumnum_f16_v_s(half %x, half inreg %y) {
-; GFX7-LABEL: v_minimumnum_f16_v_s:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f16_f32_e32 v1, s16
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_minimumnum_f16_v_s:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_max_f16_e64 v1, s16, s16
-; GFX8-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX8-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX900-LABEL: v_minimumnum_f16_v_s:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_max_f16_e64 v1, s16, s16
-; GFX900-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX900-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX950-LABEL: v_minimumnum_f16_v_s:
-; GFX950: ; %bb.0:
-; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_max_f16_e64 v1, s0, s0
-; GFX950-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX950-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX950-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_minimumnum_f16_v_s:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_max_f16_e64 v1, s16, s16
-; GFX10-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX10-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-TRUE16-LABEL: v_minimumnum_f16_v_s:
-; GFX11-TRUE16: ; %bb.0:
-; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT: v_max_f16_e64 v0.h, s0, s0
-; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-TRUE16-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h
-; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FAKE16-LABEL: v_minimumnum_f16_v_s:
-; GFX11-FAKE16: ; %bb.0:
-; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FAKE16-NEXT: v_max_f16_e64 v1, s0, s0
-; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-FAKE16-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-TRUE16-LABEL: v_minimumnum_f16_v_s:
-; GFX12-TRUE16: ; %bb.0:
-; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
-; GFX12-TRUE16-NEXT: v_max_num_f16_e64 v0.h, s0, s0
-; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
-; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
-; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-FAKE16-LABEL: v_minimumnum_f16_v_s:
-; GFX12-FAKE16: ; %bb.0:
-; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
-; GFX12-FAKE16-NEXT: v_max_num_f16_e64 v1, s0, s0
-; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0
-; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1
-; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_minimumnum_f16_v_s:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, s16
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_minimumnum_f16_v_s:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, s16
+; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_minimumnum_f16_v_s:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_max_f16_e64 v1, s16, s16
+; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_minimumnum_f16_v_s:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX8-GISEL-NEXT: v_max_f16_e64 v1, s16, s16
+; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_minimumnum_f16_v_s:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f16_e64 v1, s16, s16
+; GFX900-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX900-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_minimumnum_f16_v_s:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX900-GISEL-NEXT: v_max_f16_e64 v1, s16, s16
+; GFX900-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_minimumnum_f16_v_s:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f16_e64 v1, s0, s0
+; GFX950-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX950-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_minimumnum_f16_v_s:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX950-GISEL-NEXT: v_max_f16_e64 v1, s0, s0
+; GFX950-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_minimumnum_f16_v_s:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_max_f16_e64 v1, s16, s16
+; GFX10-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX10-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_minimumnum_f16_v_s:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX10-GISEL-NEXT: v_max_f16_e64 v1, s16, s16
+; GFX10-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-SDAG-LABEL: v_minimumnum_f16_v_s:
+; GFX11-TRUE16-SDAG: ; %bb.0:
+; GFX11-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e64 v0.h, s0, s0
+; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l
+; GFX11-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-SDAG-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h
+; GFX11-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-GISEL-LABEL: v_minimumnum_f16_v_s:
+; GFX11-TRUE16-GISEL: ; %bb.0:
+; GFX11-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l
+; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e64 v0.h, s0, s0
+; GFX11-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-GISEL-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h
+; GFX11-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-SDAG-LABEL: v_minimumnum_f16_v_s:
+; GFX11-FAKE16-SDAG: ; %bb.0:
+; GFX11-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e64 v1, s0, s0
+; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX11-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX11-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-GISEL-LABEL: v_minimumnum_f16_v_s:
+; GFX11-FAKE16-GISEL: ; %bb.0:
+; GFX11-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e64 v1, s0, s0
+; GFX11-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-TRUE16-SDAG-LABEL: v_minimumnum_f16_v_s:
+; GFX12-TRUE16-SDAG: ; %bb.0:
+; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.h, s0, s0
+; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GFX12-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-TRUE16-SDAG-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
+; GFX12-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-TRUE16-GISEL-LABEL: v_minimumnum_f16_v_s:
+; GFX12-TRUE16-GISEL: ; %bb.0:
+; GFX12-TRUE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.h, s0, s0
+; GFX12-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-TRUE16-GISEL-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
+; GFX12-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-FAKE16-SDAG-LABEL: v_minimumnum_f16_v_s:
+; GFX12-FAKE16-SDAG: ; %bb.0:
+; GFX12-FAKE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v1, s0, s0
+; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GFX12-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-FAKE16-SDAG-NEXT: v_min_num_f16_e32 v0, v0, v1
+; GFX12-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-FAKE16-GISEL-LABEL: v_minimumnum_f16_v_s:
+; GFX12-FAKE16-GISEL: ; %bb.0:
+; GFX12-FAKE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v1, s0, s0
+; GFX12-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-FAKE16-GISEL-NEXT: v_min_num_f16_e32 v0, v0, v1
+; GFX12-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call half @llvm.minimumnum.f16(half %x, half %y)
ret half %result
}
define half @v_minimumnum_f16_s_s(half inreg %x, half inreg %y) {
-; GFX7-LABEL: v_minimumnum_f16_s_s:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, s16
-; GFX7-NEXT: v_cvt_f16_f32_e32 v1, s17
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_minimumnum_f16_s_s:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_max_f16_e64 v0, s17, s17
-; GFX8-NEXT: v_max_f16_e64 v1, s16, s16
-; GFX8-NEXT: v_min_f16_e32 v0, v1, v0
-; GFX8-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_minimumnum_f16_s_s:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, s16
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, s17
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_minimumnum_f16_s_s:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, s16
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, s17
+; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_minimumnum_f16_s_s:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_max_f16_e64 v0, s17, s17
+; GFX8-SDAG-NEXT: v_max_f16_e64 v1, s16, s16
+; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v1, v0
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_minimumnum_f16_s_s:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_max_f16_e64 v0, s16, s16
+; GFX8-GISEL-NEXT: v_max_f16_e64 v1, s17, s17
+; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_minimumnum_f16_s_s:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f16_e64 v0, s17, s17
+; GFX900-SDAG-NEXT: v_max_f16_e64 v1, s16, s16
+; GFX900-SDAG-NEXT: v_min_f16_e32 v0, v1, v0
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_minimumnum_f16_s_s:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f16_e64 v0, s16, s16
+; GFX900-GISEL-NEXT: v_max_f16_e64 v1, s17, s17
+; GFX900-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_minimumnum_f16_s_s:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f16_e64 v0, s1, s1
+; GFX950-SDAG-NEXT: v_max_f16_e64 v1, s0, s0
+; GFX950-SDAG-NEXT: v_min_f16_e32 v0, v1, v0
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_minimumnum_f16_s_s:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f16_e64 v0, s0, s0
+; GFX950-GISEL-NEXT: v_max_f16_e64 v1, s1, s1
+; GFX950-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_minimumnum_f16_s_s:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_max_f16_e64 v0, s17, s17
+; GFX10-SDAG-NEXT: v_max_f16_e64 v1, s16, s16
+; GFX10-SDAG-NEXT: v_min_f16_e32 v0, v1, v0
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_minimumnum_f16_s_s:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_max_f16_e64 v0, s16, s16
+; GFX10-GISEL-NEXT: v_max_f16_e64 v1, s17, s17
+; GFX10-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-SDAG-LABEL: v_minimumnum_f16_s_s:
+; GFX11-TRUE16-SDAG: ; %bb.0:
+; GFX11-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e64 v0.l, s1, s1
+; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e64 v0.h, s0, s0
+; GFX11-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-SDAG-NEXT: v_min_f16_e32 v0.l, v0.h, v0.l
+; GFX11-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-GISEL-LABEL: v_minimumnum_f16_s_s:
+; GFX11-TRUE16-GISEL: ; %bb.0:
+; GFX11-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e64 v0.l, s0, s0
+; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e64 v0.h, s1, s1
+; GFX11-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-GISEL-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h
+; GFX11-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-SDAG-LABEL: v_minimumnum_f16_s_s:
+; GFX11-FAKE16-SDAG: ; %bb.0:
+; GFX11-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e64 v0, s1, s1
+; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e64 v1, s0, s0
+; GFX11-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-SDAG-NEXT: v_min_f16_e32 v0, v1, v0
+; GFX11-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-GISEL-LABEL: v_minimumnum_f16_s_s:
+; GFX11-FAKE16-GISEL: ; %bb.0:
+; GFX11-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e64 v0, s0, s0
+; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e64 v1, s1, s1
+; GFX11-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-TRUE16-SDAG-LABEL: v_minimumnum_f16_s_s:
+; GFX12-TRUE16-SDAG: ; %bb.0:
+; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.l, s1, s1
+; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.h, s0, s0
+; GFX12-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-TRUE16-SDAG-NEXT: v_min_num_f16_e32 v0.l, v0.h, v0.l
+; GFX12-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-TRUE16-GISEL-LABEL: v_minimumnum_f16_s_s:
+; GFX12-TRUE16-GISEL: ; %bb.0:
+; GFX12-TRUE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.l, s0, s0
+; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.h, s1, s1
+; GFX12-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-TRUE16-GISEL-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
+; GFX12-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-FAKE16-SDAG-LABEL: v_minimumnum_f16_s_s:
+; GFX12-FAKE16-SDAG: ; %bb.0:
+; GFX12-FAKE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v0, s1, s1
+; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v1, s0, s0
+; GFX12-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-FAKE16-SDAG-NEXT: v_min_num_f16_e32 v0, v1, v0
+; GFX12-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-FAKE16-GISEL-LABEL: v_minimumnum_f16_s_s:
+; GFX12-FAKE16-GISEL: ; %bb.0:
+; GFX12-FAKE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v0, s0, s0
+; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v1, s1, s1
+; GFX12-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-FAKE16-GISEL-NEXT: v_min_num_f16_e32 v0, v0, v1
+; GFX12-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+ %result = call half @llvm.minimumnum.f16(half %x, half %y)
+ ret half %result
+}
+
+define float @v_minimumnum_f32_s_v(float inreg %x, float %y) {
+; GFX7-SDAG-LABEL: v_minimumnum_f32_s_v:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-SDAG-NEXT: v_mul_f32_e64 v1, 1.0, s16
+; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v1, v0
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_minimumnum_f32_s_v:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_mul_f32_e64 v1, 1.0, s16
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v1, v0
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_minimumnum_f32_s_v:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX8-SDAG-NEXT: v_mul_f32_e64 v1, 1.0, s16
+; GFX8-SDAG-NEXT: v_min_f32_e32 v0, v1, v0
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_minimumnum_f32_s_v:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_mul_f32_e64 v1, 1.0, s16
+; GFX8-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX8-GISEL-NEXT: v_min_f32_e32 v0, v1, v0
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_minimumnum_f32_s_v:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-SDAG-NEXT: v_max_f32_e64 v1, s16, s16
+; GFX900-SDAG-NEXT: v_min_f32_e32 v0, v1, v0
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_minimumnum_f32_s_v:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f32_e64 v1, s16, s16
+; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-GISEL-NEXT: v_min_f32_e32 v0, v1, v0
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_minimumnum_f32_s_v:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-SDAG-NEXT: v_max_f32_e64 v1, s0, s0
+; GFX950-SDAG-NEXT: v_min_f32_e32 v0, v1, v0
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_minimumnum_f32_s_v:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f32_e64 v1, s0, s0
+; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-GISEL-NEXT: v_min_f32_e32 v0, v1, v0
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_minimumnum_f32_s_v:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX10-SDAG-NEXT: v_max_f32_e64 v1, s16, s16
+; GFX10-SDAG-NEXT: v_min_f32_e32 v0, v1, v0
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_minimumnum_f32_s_v:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_max_f32_e64 v1, s16, s16
+; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX10-GISEL-NEXT: v_min_f32_e32 v0, v1, v0
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_minimumnum_f32_s_v:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX11-SDAG-NEXT: v_max_f32_e64 v1, s0, s0
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_min_f32_e32 v0, v1, v0
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_minimumnum_f32_s_v:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_max_f32_e64 v1, s0, s0
+; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_min_f32_e32 v0, v1, v0
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-SDAG-LABEL: v_minimumnum_f32_s_v:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v0
+; GFX12-SDAG-NEXT: v_max_num_f32_e64 v1, s0, s0
+; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-SDAG-NEXT: v_min_num_f32_e32 v0, v1, v0
+; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GISEL-LABEL: v_minimumnum_f32_s_v:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: v_max_num_f32_e64 v1, s0, s0
+; GFX12-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v0
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-GISEL-NEXT: v_min_num_f32_e32 v0, v1, v0
+; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
+ %result = call float @llvm.minimumnum.f32(float %x, float %y)
+ ret float %result
+}
+
+define float @v_minimumnum_f32_v_s(float %x, float inreg %y) {
+; GFX7-SDAG-LABEL: v_minimumnum_f32_v_s:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_mul_f32_e64 v1, 1.0, s16
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_minimumnum_f32_v_s:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-GISEL-NEXT: v_mul_f32_e64 v1, 1.0, s16
+; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_minimumnum_f32_v_s:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_mul_f32_e64 v1, 1.0, s16
+; GFX8-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX8-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_minimumnum_f32_v_s:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX8-GISEL-NEXT: v_mul_f32_e64 v1, 1.0, s16
+; GFX8-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_minimumnum_f32_v_s:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f32_e64 v1, s16, s16
+; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_minimumnum_f32_v_s:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-GISEL-NEXT: v_max_f32_e64 v1, s16, s16
+; GFX900-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_minimumnum_f32_v_s:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f32_e64 v1, s0, s0
+; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_minimumnum_f32_v_s:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-GISEL-NEXT: v_max_f32_e64 v1, s0, s0
+; GFX950-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_minimumnum_f32_v_s:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_max_f32_e64 v1, s16, s16
+; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX10-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_minimumnum_f32_v_s:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX10-GISEL-NEXT: v_max_f32_e64 v1, s16, s16
+; GFX10-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_minimumnum_f32_v_s:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_max_f32_e64 v1, s0, s0
+; GFX11-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_minimumnum_f32_v_s:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX11-GISEL-NEXT: v_max_f32_e64 v1, s0, s0
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-SDAG-LABEL: v_minimumnum_f32_v_s:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: v_max_num_f32_e64 v1, s0, s0
+; GFX12-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v0
+; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-SDAG-NEXT: v_min_num_f32_e32 v0, v0, v1
+; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GISEL-LABEL: v_minimumnum_f32_v_s:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v0
+; GFX12-GISEL-NEXT: v_max_num_f32_e64 v1, s0, s0
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-GISEL-NEXT: v_min_num_f32_e32 v0, v0, v1
+; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
+ %result = call float @llvm.minimumnum.f32(float %x, float %y)
+ ret float %result
+}
+
+define float @v_minimumnum_f32_s_s(float inreg %x, float inreg %y) {
+; GFX7-SDAG-LABEL: v_minimumnum_f32_s_s:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_mul_f32_e64 v0, 1.0, s17
+; GFX7-SDAG-NEXT: v_mul_f32_e64 v1, 1.0, s16
+; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v1, v0
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_minimumnum_f32_s_s:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_mul_f32_e64 v0, 1.0, s16
+; GFX7-GISEL-NEXT: v_mul_f32_e64 v1, 1.0, s17
+; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_minimumnum_f32_s_s:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_mul_f32_e64 v0, 1.0, s17
+; GFX8-SDAG-NEXT: v_mul_f32_e64 v1, 1.0, s16
+; GFX8-SDAG-NEXT: v_min_f32_e32 v0, v1, v0
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_minimumnum_f32_s_s:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_mul_f32_e64 v0, 1.0, s16
+; GFX8-GISEL-NEXT: v_mul_f32_e64 v1, 1.0, s17
+; GFX8-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_minimumnum_f32_s_s:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f32_e64 v0, s17, s17
+; GFX900-SDAG-NEXT: v_max_f32_e64 v1, s16, s16
+; GFX900-SDAG-NEXT: v_min_f32_e32 v0, v1, v0
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_minimumnum_f32_s_s:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f32_e64 v0, s16, s16
+; GFX900-GISEL-NEXT: v_max_f32_e64 v1, s17, s17
+; GFX900-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_minimumnum_f32_s_s:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f32_e64 v0, s1, s1
+; GFX950-SDAG-NEXT: v_max_f32_e64 v1, s0, s0
+; GFX950-SDAG-NEXT: v_min_f32_e32 v0, v1, v0
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_minimumnum_f32_s_s:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f32_e64 v0, s0, s0
+; GFX950-GISEL-NEXT: v_max_f32_e64 v1, s1, s1
+; GFX950-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_minimumnum_f32_s_s:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_max_f32_e64 v0, s17, s17
+; GFX10-SDAG-NEXT: v_max_f32_e64 v1, s16, s16
+; GFX10-SDAG-NEXT: v_min_f32_e32 v0, v1, v0
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_minimumnum_f32_s_s:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_max_f32_e64 v0, s16, s16
+; GFX10-GISEL-NEXT: v_max_f32_e64 v1, s17, s17
+; GFX10-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_minimumnum_f32_s_s:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_max_f32_e64 v0, s1, s1
+; GFX11-SDAG-NEXT: v_max_f32_e64 v1, s0, s0
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_min_f32_e32 v0, v1, v0
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_minimumnum_f32_s_s:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_max_f32_e64 v0, s0, s0
+; GFX11-GISEL-NEXT: v_max_f32_e64 v1, s1, s1
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-SDAG-LABEL: v_minimumnum_f32_s_s:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: v_max_num_f32_e64 v0, s1, s1
+; GFX12-SDAG-NEXT: v_max_num_f32_e64 v1, s0, s0
+; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-SDAG-NEXT: v_min_num_f32_e32 v0, v1, v0
+; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GISEL-LABEL: v_minimumnum_f32_s_s:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: v_max_num_f32_e64 v0, s0, s0
+; GFX12-GISEL-NEXT: v_max_num_f32_e64 v1, s1, s1
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-GISEL-NEXT: v_min_num_f32_e32 v0, v0, v1
+; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
+ %result = call float @llvm.minimumnum.f32(float %x, float %y)
+ ret float %result
+}
+
+define double @v_minimumnum_f64_s_v(double inreg %x, double %y) {
+; GFX7-SDAG-LABEL: v_minimumnum_f64_s_v:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX7-SDAG-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
+; GFX7-SDAG-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1]
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_minimumnum_f64_s_v:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
+; GFX7-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX7-GISEL-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1]
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_minimumnum_f64_s_v:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX8-SDAG-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
+; GFX8-SDAG-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1]
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_minimumnum_f64_s_v:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
+; GFX8-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX8-GISEL-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1]
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_minimumnum_f64_s_v:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX900-SDAG-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
+; GFX900-SDAG-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1]
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_minimumnum_f64_s_v:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
+; GFX900-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX900-GISEL-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1]
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_minimumnum_f64_s_v:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX950-SDAG-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1]
+; GFX950-SDAG-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1]
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_minimumnum_f64_s_v:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1]
+; GFX950-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX950-GISEL-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1]
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX900-LABEL: v_minimumnum_f16_s_s:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_max_f16_e64 v0, s17, s17
-; GFX900-NEXT: v_max_f16_e64 v1, s16, s16
-; GFX900-NEXT: v_min_f16_e32 v0, v1, v0
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX950-LABEL: v_minimumnum_f16_s_s:
-; GFX950: ; %bb.0:
-; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_max_f16_e64 v0, s1, s1
-; GFX950-NEXT: v_max_f16_e64 v1, s0, s0
-; GFX950-NEXT: v_min_f16_e32 v0, v1, v0
-; GFX950-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_minimumnum_f16_s_s:
+; GFX10-LABEL: v_minimumnum_f64_s_v:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_max_f16_e64 v0, s17, s17
-; GFX10-NEXT: v_max_f16_e64 v1, s16, s16
-; GFX10-NEXT: v_min_f16_e32 v0, v1, v0
+; GFX10-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
+; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX10-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1]
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-TRUE16-LABEL: v_minimumnum_f16_s_s:
-; GFX11-TRUE16: ; %bb.0:
-; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT: v_max_f16_e64 v0.l, s1, s1
-; GFX11-TRUE16-NEXT: v_max_f16_e64 v0.h, s0, s0
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-TRUE16-NEXT: v_min_f16_e32 v0.l, v0.h, v0.l
-; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FAKE16-LABEL: v_minimumnum_f16_s_s:
-; GFX11-FAKE16: ; %bb.0:
-; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FAKE16-NEXT: v_max_f16_e64 v0, s1, s1
-; GFX11-FAKE16-NEXT: v_max_f16_e64 v1, s0, s0
-; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-FAKE16-NEXT: v_min_f16_e32 v0, v1, v0
-; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-TRUE16-LABEL: v_minimumnum_f16_s_s:
-; GFX12-TRUE16: ; %bb.0:
-; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
-; GFX12-TRUE16-NEXT: v_max_num_f16_e64 v0.l, s1, s1
-; GFX12-TRUE16-NEXT: v_max_num_f16_e64 v0.h, s0, s0
-; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.h, v0.l
-; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-FAKE16-LABEL: v_minimumnum_f16_s_s:
-; GFX12-FAKE16: ; %bb.0:
-; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
-; GFX12-FAKE16-NEXT: v_max_num_f16_e64 v0, s1, s1
-; GFX12-FAKE16-NEXT: v_max_num_f16_e64 v1, s0, s0
-; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-FAKE16-NEXT: v_min_num_f16_e32 v0, v1, v0
-; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
- %result = call half @llvm.minimumnum.f16(half %x, half %y)
- ret half %result
-}
-
-define float @v_minimumnum_f32_s_v(float inreg %x, float %y) {
-; GFX7-LABEL: v_minimumnum_f32_s_v:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
-; GFX7-NEXT: v_mul_f32_e64 v1, 1.0, s16
-; GFX7-NEXT: v_min_f32_e32 v0, v1, v0
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_minimumnum_f32_s_v:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0
-; GFX8-NEXT: v_mul_f32_e64 v1, 1.0, s16
-; GFX8-NEXT: v_min_f32_e32 v0, v1, v0
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX900-LABEL: v_minimumnum_f32_s_v:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX900-NEXT: v_max_f32_e64 v1, s16, s16
-; GFX900-NEXT: v_min_f32_e32 v0, v1, v0
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX950-LABEL: v_minimumnum_f32_s_v:
-; GFX950: ; %bb.0:
-; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX950-NEXT: v_max_f32_e64 v1, s0, s0
-; GFX950-NEXT: v_min_f32_e32 v0, v1, v0
-; GFX950-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_minimumnum_f32_s_v:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX10-NEXT: v_max_f32_e64 v1, s16, s16
-; GFX10-NEXT: v_min_f32_e32 v0, v1, v0
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: v_minimumnum_f32_s_v:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX11-NEXT: v_max_f32_e64 v1, s0, s0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_min_f32_e32 v0, v1, v0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_minimumnum_f32_s_v:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v0
-; GFX12-NEXT: v_max_num_f32_e64 v1, s0, s0
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT: v_min_num_f32_e32 v0, v1, v0
-; GFX12-NEXT: s_setpc_b64 s[30:31]
- %result = call float @llvm.minimumnum.f32(float %x, float %y)
- ret float %result
-}
-
-define float @v_minimumnum_f32_v_s(float %x, float inreg %y) {
-; GFX7-LABEL: v_minimumnum_f32_v_s:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_mul_f32_e64 v1, 1.0, s16
-; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
-; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_minimumnum_f32_v_s:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_mul_f32_e64 v1, 1.0, s16
-; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0
-; GFX8-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX900-LABEL: v_minimumnum_f32_v_s:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_max_f32_e64 v1, s16, s16
-; GFX900-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX900-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX950-LABEL: v_minimumnum_f32_v_s:
-; GFX950: ; %bb.0:
-; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_max_f32_e64 v1, s0, s0
-; GFX950-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX950-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX950-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_minimumnum_f32_v_s:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_max_f32_e64 v1, s16, s16
-; GFX10-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX10-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: v_minimumnum_f32_v_s:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_max_f32_e64 v1, s0, s0
-; GFX11-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_minimumnum_f32_v_s:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_max_num_f32_e64 v1, s0, s0
-; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v0
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT: v_min_num_f32_e32 v0, v0, v1
-; GFX12-NEXT: s_setpc_b64 s[30:31]
- %result = call float @llvm.minimumnum.f32(float %x, float %y)
- ret float %result
-}
-
-define float @v_minimumnum_f32_s_s(float inreg %x, float inreg %y) {
-; GFX7-LABEL: v_minimumnum_f32_s_s:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_mul_f32_e64 v0, 1.0, s17
-; GFX7-NEXT: v_mul_f32_e64 v1, 1.0, s16
-; GFX7-NEXT: v_min_f32_e32 v0, v1, v0
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_minimumnum_f32_s_s:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_mul_f32_e64 v0, 1.0, s17
-; GFX8-NEXT: v_mul_f32_e64 v1, 1.0, s16
-; GFX8-NEXT: v_min_f32_e32 v0, v1, v0
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX900-LABEL: v_minimumnum_f32_s_s:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_max_f32_e64 v0, s17, s17
-; GFX900-NEXT: v_max_f32_e64 v1, s16, s16
-; GFX900-NEXT: v_min_f32_e32 v0, v1, v0
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX950-LABEL: v_minimumnum_f32_s_s:
-; GFX950: ; %bb.0:
-; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_max_f32_e64 v0, s1, s1
-; GFX950-NEXT: v_max_f32_e64 v1, s0, s0
-; GFX950-NEXT: v_min_f32_e32 v0, v1, v0
-; GFX950-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_minimumnum_f32_s_s:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_max_f32_e64 v0, s17, s17
-; GFX10-NEXT: v_max_f32_e64 v1, s16, s16
-; GFX10-NEXT: v_min_f32_e32 v0, v1, v0
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: v_minimumnum_f32_s_s:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_max_f32_e64 v0, s1, s1
-; GFX11-NEXT: v_max_f32_e64 v1, s0, s0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_min_f32_e32 v0, v1, v0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_minimumnum_f32_s_s:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_max_num_f32_e64 v0, s1, s1
-; GFX12-NEXT: v_max_num_f32_e64 v1, s0, s0
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT: v_min_num_f32_e32 v0, v1, v0
-; GFX12-NEXT: s_setpc_b64 s[30:31]
- %result = call float @llvm.minimumnum.f32(float %x, float %y)
- ret float %result
-}
-
-define double @v_minimumnum_f64_s_v(double inreg %x, double %y) {
-; GFX7-LABEL: v_minimumnum_f64_s_v:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX7-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
-; GFX7-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1]
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_minimumnum_f64_s_v:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX8-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
-; GFX8-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1]
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX900-LABEL: v_minimumnum_f64_s_v:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX900-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
-; GFX900-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1]
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX950-LABEL: v_minimumnum_f64_s_v:
-; GFX950: ; %bb.0:
-; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX950-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1]
-; GFX950-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1]
-; GFX950-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_minimumnum_f64_s_v:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
-; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX10-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1]
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: v_minimumnum_f64_s_v:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1]
-; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1]
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-LABEL: v_minimumnum_f64_s_v:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1]
+; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1]
+; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimumnum_f64_s_v:
; GFX12: ; %bb.0:
@@ -1151,37 +1757,69 @@ define double @v_minimumnum_f64_s_v(double inreg %x, double %y) {
}
define double @v_minimumnum_f64_v_s(double %x, double inreg %y) {
-; GFX7-LABEL: v_minimumnum_f64_v_s:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
-; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_minimumnum_f64_v_s:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
-; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX900-LABEL: v_minimumnum_f64_v_s:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
-; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX900-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX950-LABEL: v_minimumnum_f64_v_s:
-; GFX950: ; %bb.0:
-; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1]
-; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX950-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
-; GFX950-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_minimumnum_f64_v_s:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
+; GFX7-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX7-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_minimumnum_f64_v_s:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX7-GISEL-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
+; GFX7-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_minimumnum_f64_v_s:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
+; GFX8-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX8-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_minimumnum_f64_v_s:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX8-GISEL-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
+; GFX8-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_minimumnum_f64_v_s:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
+; GFX900-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX900-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_minimumnum_f64_v_s:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX900-GISEL-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
+; GFX900-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_minimumnum_f64_v_s:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1]
+; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX950-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_minimumnum_f64_v_s:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX950-GISEL-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1]
+; GFX950-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_f64_v_s:
; GFX10: ; %bb.0:
@@ -1217,184 +1855,354 @@ define double @v_minimumnum_f64_v_s(double %x, double inreg %y) {
}
define double @v_minimumnum_f64_s_s(double inreg %x, double inreg %y) {
-; GFX7-LABEL: v_minimumnum_f64_s_s:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_max_f64 v[0:1], s[18:19], s[18:19]
-; GFX7-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
-; GFX7-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1]
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_minimumnum_f64_s_s:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_max_f64 v[0:1], s[18:19], s[18:19]
-; GFX8-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
-; GFX8-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1]
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX900-LABEL: v_minimumnum_f64_s_s:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_max_f64 v[0:1], s[18:19], s[18:19]
-; GFX900-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
-; GFX900-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1]
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX950-LABEL: v_minimumnum_f64_s_s:
-; GFX950: ; %bb.0:
-; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_max_f64 v[0:1], s[2:3], s[2:3]
-; GFX950-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1]
-; GFX950-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1]
-; GFX950-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_minimumnum_f64_s_s:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_max_f64 v[0:1], s[18:19], s[18:19]
-; GFX10-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
-; GFX10-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1]
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: v_minimumnum_f64_s_s:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_max_f64 v[0:1], s[2:3], s[2:3]
-; GFX11-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1]
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1]
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_minimumnum_f64_s_s:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_max_num_f64_e64 v[0:1], s[2:3], s[2:3]
-; GFX12-NEXT: v_max_num_f64_e64 v[2:3], s[0:1], s[0:1]
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[2:3], v[0:1]
-; GFX12-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_minimumnum_f64_s_s:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_max_f64 v[0:1], s[18:19], s[18:19]
+; GFX7-SDAG-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
+; GFX7-SDAG-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1]
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_minimumnum_f64_s_s:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_max_f64 v[0:1], s[16:17], s[16:17]
+; GFX7-GISEL-NEXT: v_max_f64 v[2:3], s[18:19], s[18:19]
+; GFX7-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_minimumnum_f64_s_s:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_max_f64 v[0:1], s[18:19], s[18:19]
+; GFX8-SDAG-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
+; GFX8-SDAG-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1]
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_minimumnum_f64_s_s:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_max_f64 v[0:1], s[16:17], s[16:17]
+; GFX8-GISEL-NEXT: v_max_f64 v[2:3], s[18:19], s[18:19]
+; GFX8-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_minimumnum_f64_s_s:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f64 v[0:1], s[18:19], s[18:19]
+; GFX900-SDAG-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
+; GFX900-SDAG-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1]
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_minimumnum_f64_s_s:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f64 v[0:1], s[16:17], s[16:17]
+; GFX900-GISEL-NEXT: v_max_f64 v[2:3], s[18:19], s[18:19]
+; GFX900-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_minimumnum_f64_s_s:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f64 v[0:1], s[2:3], s[2:3]
+; GFX950-SDAG-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1]
+; GFX950-SDAG-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1]
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_minimumnum_f64_s_s:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f64 v[0:1], s[0:1], s[0:1]
+; GFX950-GISEL-NEXT: v_max_f64 v[2:3], s[2:3], s[2:3]
+; GFX950-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_minimumnum_f64_s_s:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_max_f64 v[0:1], s[18:19], s[18:19]
+; GFX10-SDAG-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
+; GFX10-SDAG-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1]
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_minimumnum_f64_s_s:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_max_f64 v[0:1], s[16:17], s[16:17]
+; GFX10-GISEL-NEXT: v_max_f64 v[2:3], s[18:19], s[18:19]
+; GFX10-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_minimumnum_f64_s_s:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_max_f64 v[0:1], s[2:3], s[2:3]
+; GFX11-SDAG-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1]
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1]
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_minimumnum_f64_s_s:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_max_f64 v[0:1], s[0:1], s[0:1]
+; GFX11-GISEL-NEXT: v_max_f64 v[2:3], s[2:3], s[2:3]
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-SDAG-LABEL: v_minimumnum_f64_s_s:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: v_max_num_f64_e64 v[0:1], s[2:3], s[2:3]
+; GFX12-SDAG-NEXT: v_max_num_f64_e64 v[2:3], s[0:1], s[0:1]
+; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-SDAG-NEXT: v_min_num_f64_e32 v[0:1], v[2:3], v[0:1]
+; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GISEL-LABEL: v_minimumnum_f64_s_s:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: v_max_num_f64_e64 v[0:1], s[0:1], s[0:1]
+; GFX12-GISEL-NEXT: v_max_num_f64_e64 v[2:3], s[2:3], s[2:3]
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-GISEL-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[2:3]
+; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call double @llvm.minimumnum.f64(double %x, double %y)
ret double %result
}
define float @v_minimumnum_f32_fabs_rhs(float %x, float %y) {
-; GFX7-LABEL: v_minimumnum_f32_fabs_rhs:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_mul_f32_e64 v1, 1.0, |v1|
-; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
-; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_minimumnum_f32_fabs_rhs:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_mul_f32_e64 v1, 1.0, |v1|
-; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0
-; GFX8-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: v_minimumnum_f32_fabs_rhs:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e64 v1, |v1|, |v1|
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_minimumnum_f32_fabs_rhs:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_max_f32_e64 v1, |v1|, |v1|
-; GFX10-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX10-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: v_minimumnum_f32_fabs_rhs:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_max_f32_e64 v1, |v1|, |v1|
-; GFX11-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_minimumnum_f32_fabs_rhs:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_max_num_f32_e64 v1, |v1|, |v1|
-; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v0
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT: v_min_num_f32_e32 v0, v0, v1
-; GFX12-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_minimumnum_f32_fabs_rhs:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_mul_f32_e64 v1, 1.0, |v1|
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_minimumnum_f32_fabs_rhs:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-GISEL-NEXT: v_mul_f32_e64 v1, 1.0, |v1|
+; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_minimumnum_f32_fabs_rhs:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_mul_f32_e64 v1, 1.0, |v1|
+; GFX8-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX8-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_minimumnum_f32_fabs_rhs:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX8-GISEL-NEXT: v_mul_f32_e64 v1, 1.0, |v1|
+; GFX8-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: v_minimumnum_f32_fabs_rhs:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1|
+; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX9-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: v_minimumnum_f32_fabs_rhs:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX9-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1|
+; GFX9-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_minimumnum_f32_fabs_rhs:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1|
+; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX10-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_minimumnum_f32_fabs_rhs:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX10-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1|
+; GFX10-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_minimumnum_f32_fabs_rhs:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1|
+; GFX11-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_minimumnum_f32_fabs_rhs:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX11-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1|
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-SDAG-LABEL: v_minimumnum_f32_fabs_rhs:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: v_max_num_f32_e64 v1, |v1|, |v1|
+; GFX12-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v0
+; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-SDAG-NEXT: v_min_num_f32_e32 v0, v0, v1
+; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GISEL-LABEL: v_minimumnum_f32_fabs_rhs:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v0
+; GFX12-GISEL-NEXT: v_max_num_f32_e64 v1, |v1|, |v1|
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-GISEL-NEXT: v_min_num_f32_e32 v0, v0, v1
+; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
%fabs.y = call float @llvm.fabs.f32(float %y)
%result = call float @llvm.minimumnum.f32(float %x, float %fabs.y)
ret float %result
}
define float @v_minimumnum_f32_fneg_fabs_rhs(float %x, float %y) {
-; GFX7-LABEL: v_minimumnum_f32_fneg_fabs_rhs:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_mul_f32_e64 v1, -1.0, |v1|
-; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
-; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_minimumnum_f32_fneg_fabs_rhs:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_mul_f32_e64 v1, -1.0, |v1|
-; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0
-; GFX8-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: v_minimumnum_f32_fneg_fabs_rhs:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e64 v1, -|v1|, -|v1|
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_minimumnum_f32_fneg_fabs_rhs:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_max_f32_e64 v1, -|v1|, -|v1|
-; GFX10-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX10-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: v_minimumnum_f32_fneg_fabs_rhs:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_max_f32_e64 v1, -|v1|, -|v1|
-; GFX11-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_minimumnum_f32_fneg_fabs_rhs:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_max_num_f32_e64 v1, -|v1|, -|v1|
-; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v0
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT: v_min_num_f32_e32 v0, v0, v1
-; GFX12-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_minimumnum_f32_fneg_fabs_rhs:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_mul_f32_e64 v1, -1.0, |v1|
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_minimumnum_f32_fneg_fabs_rhs:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-GISEL-NEXT: v_mul_f32_e64 v1, -1.0, |v1|
+; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_minimumnum_f32_fneg_fabs_rhs:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_mul_f32_e64 v1, -1.0, |v1|
+; GFX8-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX8-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_minimumnum_f32_fneg_fabs_rhs:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX8-GISEL-NEXT: v_mul_f32_e64 v1, -1.0, |v1|
+; GFX8-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: v_minimumnum_f32_fneg_fabs_rhs:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_max_f32_e64 v1, -|v1|, -|v1|
+; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX9-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: v_minimumnum_f32_fneg_fabs_rhs:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX9-GISEL-NEXT: v_max_f32_e64 v1, -|v1|, -|v1|
+; GFX9-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_minimumnum_f32_fneg_fabs_rhs:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_max_f32_e64 v1, -|v1|, -|v1|
+; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX10-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_minimumnum_f32_fneg_fabs_rhs:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX10-GISEL-NEXT: v_max_f32_e64 v1, -|v1|, -|v1|
+; GFX10-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_minimumnum_f32_fneg_fabs_rhs:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_max_f32_e64 v1, -|v1|, -|v1|
+; GFX11-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_minimumnum_f32_fneg_fabs_rhs:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX11-GISEL-NEXT: v_max_f32_e64 v1, -|v1|, -|v1|
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-SDAG-LABEL: v_minimumnum_f32_fneg_fabs_rhs:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: v_max_num_f32_e64 v1, -|v1|, -|v1|
+; GFX12-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v0
+; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-SDAG-NEXT: v_min_num_f32_e32 v0, v0, v1
+; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GISEL-LABEL: v_minimumnum_f32_fneg_fabs_rhs:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v0
+; GFX12-GISEL-NEXT: v_max_num_f32_e64 v1, -|v1|, -|v1|
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-GISEL-NEXT: v_min_num_f32_e32 v0, v0, v1
+; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
%fabs.y = call float @llvm.fabs.f32(float %y)
%fneg.fabs.y = fneg float %fabs.y
%result = call float @llvm.minimumnum.f32(float %x, float %fneg.fabs.y)
@@ -1402,59 +2210,113 @@ define float @v_minimumnum_f32_fneg_fabs_rhs(float %x, float %y) {
}
define float @v_minimumnum_f32_fabs(float %x, float %y) {
-; GFX7-LABEL: v_minimumnum_f32_fabs:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_mul_f32_e64 v1, 1.0, |v1|
-; GFX7-NEXT: v_mul_f32_e64 v0, 1.0, |v0|
-; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_minimumnum_f32_fabs:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_mul_f32_e64 v1, 1.0, |v1|
-; GFX8-NEXT: v_mul_f32_e64 v0, 1.0, |v0|
-; GFX8-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: v_minimumnum_f32_fabs:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e64 v1, |v1|, |v1|
-; GFX9-NEXT: v_max_f32_e64 v0, |v0|, |v0|
-; GFX9-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_minimumnum_f32_fabs:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_max_f32_e64 v1, |v1|, |v1|
-; GFX10-NEXT: v_max_f32_e64 v0, |v0|, |v0|
-; GFX10-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: v_minimumnum_f32_fabs:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_max_f32_e64 v1, |v1|, |v1|
-; GFX11-NEXT: v_max_f32_e64 v0, |v0|, |v0|
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_minimumnum_f32_fabs:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_max_num_f32_e64 v1, |v1|, |v1|
-; GFX12-NEXT: v_max_num_f32_e64 v0, |v0|, |v0|
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT: v_min_num_f32_e32 v0, v0, v1
-; GFX12-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_minimumnum_f32_fabs:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_mul_f32_e64 v1, 1.0, |v1|
+; GFX7-SDAG-NEXT: v_mul_f32_e64 v0, 1.0, |v0|
+; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_minimumnum_f32_fabs:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_mul_f32_e64 v0, 1.0, |v0|
+; GFX7-GISEL-NEXT: v_mul_f32_e64 v1, 1.0, |v1|
+; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_minimumnum_f32_fabs:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_mul_f32_e64 v1, 1.0, |v1|
+; GFX8-SDAG-NEXT: v_mul_f32_e64 v0, 1.0, |v0|
+; GFX8-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_minimumnum_f32_fabs:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_mul_f32_e64 v0, 1.0, |v0|
+; GFX8-GISEL-NEXT: v_mul_f32_e64 v1, 1.0, |v1|
+; GFX8-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: v_minimumnum_f32_fabs:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1|
+; GFX9-SDAG-NEXT: v_max_f32_e64 v0, |v0|, |v0|
+; GFX9-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: v_minimumnum_f32_fabs:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_max_f32_e64 v0, |v0|, |v0|
+; GFX9-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1|
+; GFX9-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_minimumnum_f32_fabs:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1|
+; GFX10-SDAG-NEXT: v_max_f32_e64 v0, |v0|, |v0|
+; GFX10-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_minimumnum_f32_fabs:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_max_f32_e64 v0, |v0|, |v0|
+; GFX10-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1|
+; GFX10-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_minimumnum_f32_fabs:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1|
+; GFX11-SDAG-NEXT: v_max_f32_e64 v0, |v0|, |v0|
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_minimumnum_f32_fabs:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_max_f32_e64 v0, |v0|, |v0|
+; GFX11-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1|
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-SDAG-LABEL: v_minimumnum_f32_fabs:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: v_max_num_f32_e64 v1, |v1|, |v1|
+; GFX12-SDAG-NEXT: v_max_num_f32_e64 v0, |v0|, |v0|
+; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-SDAG-NEXT: v_min_num_f32_e32 v0, v0, v1
+; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GISEL-LABEL: v_minimumnum_f32_fabs:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: v_max_num_f32_e64 v0, |v0|, |v0|
+; GFX12-GISEL-NEXT: v_max_num_f32_e64 v1, |v1|, |v1|
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-GISEL-NEXT: v_min_num_f32_e32 v0, v0, v1
+; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
%fabs.x = call float @llvm.fabs.f32(float %x)
%fabs.y = call float @llvm.fabs.f32(float %y)
%result = call float @llvm.minimumnum.f32(float %fabs.x, float %fabs.y)
@@ -1462,59 +2324,113 @@ define float @v_minimumnum_f32_fabs(float %x, float %y) {
}
define float @v_minimumnum_f32_fneg(float %x, float %y) {
-; GFX7-LABEL: v_minimumnum_f32_fneg:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_mul_f32_e32 v1, -1.0, v1
-; GFX7-NEXT: v_mul_f32_e32 v0, -1.0, v0
-; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_minimumnum_f32_fneg:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_mul_f32_e32 v1, -1.0, v1
-; GFX8-NEXT: v_mul_f32_e32 v0, -1.0, v0
-; GFX8-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: v_minimumnum_f32_fneg:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e64 v1, -v1, -v1
-; GFX9-NEXT: v_max_f32_e64 v0, -v0, -v0
-; GFX9-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_minimumnum_f32_fneg:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_max_f32_e64 v1, -v1, -v1
-; GFX10-NEXT: v_max_f32_e64 v0, -v0, -v0
-; GFX10-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: v_minimumnum_f32_fneg:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_max_f32_e64 v1, -v1, -v1
-; GFX11-NEXT: v_max_f32_e64 v0, -v0, -v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_minimumnum_f32_fneg:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_max_num_f32_e64 v1, -v1, -v1
-; GFX12-NEXT: v_max_num_f32_e64 v0, -v0, -v0
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT: v_min_num_f32_e32 v0, v0, v1
-; GFX12-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_minimumnum_f32_fneg:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v1, -1.0, v1
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, -1.0, v0
+; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_minimumnum_f32_fneg:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, -1.0, v0
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v1, -1.0, v1
+; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_minimumnum_f32_fneg:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_mul_f32_e32 v1, -1.0, v1
+; GFX8-SDAG-NEXT: v_mul_f32_e32 v0, -1.0, v0
+; GFX8-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_minimumnum_f32_fneg:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_mul_f32_e32 v0, -1.0, v0
+; GFX8-GISEL-NEXT: v_mul_f32_e32 v1, -1.0, v1
+; GFX8-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: v_minimumnum_f32_fneg:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_max_f32_e64 v1, -v1, -v1
+; GFX9-SDAG-NEXT: v_max_f32_e64 v0, -v0, -v0
+; GFX9-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: v_minimumnum_f32_fneg:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0
+; GFX9-GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1
+; GFX9-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_minimumnum_f32_fneg:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_max_f32_e64 v1, -v1, -v1
+; GFX10-SDAG-NEXT: v_max_f32_e64 v0, -v0, -v0
+; GFX10-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_minimumnum_f32_fneg:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0
+; GFX10-GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1
+; GFX10-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_minimumnum_f32_fneg:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_max_f32_e64 v1, -v1, -v1
+; GFX11-SDAG-NEXT: v_max_f32_e64 v0, -v0, -v0
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_minimumnum_f32_fneg:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0
+; GFX11-GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-SDAG-LABEL: v_minimumnum_f32_fneg:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: v_max_num_f32_e64 v1, -v1, -v1
+; GFX12-SDAG-NEXT: v_max_num_f32_e64 v0, -v0, -v0
+; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-SDAG-NEXT: v_min_num_f32_e32 v0, v0, v1
+; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GISEL-LABEL: v_minimumnum_f32_fneg:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: v_max_num_f32_e64 v0, -v0, -v0
+; GFX12-GISEL-NEXT: v_max_num_f32_e64 v1, -v1, -v1
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-GISEL-NEXT: v_min_num_f32_e32 v0, v0, v1
+; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
%fneg.x = fneg float %x
%fneg.y = fneg float %y
%result = call float @llvm.minimumnum.f32(float %fneg.x, float %fneg.y)
@@ -1522,166 +2438,320 @@ define float @v_minimumnum_f32_fneg(float %x, float %y) {
}
define half @v_minimumnum_f16_fabs_rhs(half %x, half %y) {
-; GFX7-LABEL: v_minimumnum_f16_fabs_rhs:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e64 v1, |v1|
-; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_minimumnum_f16_fabs_rhs:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_max_f16_e64 v1, |v1|, |v1|
-; GFX8-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX8-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: v_minimumnum_f16_fabs_rhs:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f16_e64 v1, |v1|, |v1|
-; GFX9-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX9-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_minimumnum_f16_fabs_rhs:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_max_f16_e64 v1, |v1|, |v1|
-; GFX10-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX10-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-TRUE16-LABEL: v_minimumnum_f16_fabs_rhs:
-; GFX11-TRUE16: ; %bb.0:
-; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT: v_max_f16_e64 v0.h, |v1.l|, |v1.l|
-; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-TRUE16-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h
-; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FAKE16-LABEL: v_minimumnum_f16_fabs_rhs:
-; GFX11-FAKE16: ; %bb.0:
-; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FAKE16-NEXT: v_max_f16_e64 v1, |v1|, |v1|
-; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-FAKE16-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-TRUE16-LABEL: v_minimumnum_f16_fabs_rhs:
-; GFX12-TRUE16: ; %bb.0:
-; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
-; GFX12-TRUE16-NEXT: v_max_num_f16_e64 v0.h, |v1.l|, |v1.l|
-; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
-; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
-; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-FAKE16-LABEL: v_minimumnum_f16_fabs_rhs:
-; GFX12-FAKE16: ; %bb.0:
-; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
-; GFX12-FAKE16-NEXT: v_max_num_f16_e64 v1, |v1|, |v1|
-; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0
-; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1
-; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_minimumnum_f16_fabs_rhs:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e64 v1, |v1|
+; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_minimumnum_f16_fabs_rhs:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v1, |v1|
+; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_minimumnum_f16_fabs_rhs:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_minimumnum_f16_fabs_rhs:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX8-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: v_minimumnum_f16_fabs_rhs:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX9-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: v_minimumnum_f16_fabs_rhs:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX9-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX9-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_minimumnum_f16_fabs_rhs:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX10-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX10-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_minimumnum_f16_fabs_rhs:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX10-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX10-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-SDAG-LABEL: v_minimumnum_f16_fabs_rhs:
+; GFX11-TRUE16-SDAG: ; %bb.0:
+; GFX11-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e64 v0.h, |v1.l|, |v1.l|
+; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l
+; GFX11-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-SDAG-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h
+; GFX11-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-GISEL-LABEL: v_minimumnum_f16_fabs_rhs:
+; GFX11-TRUE16-GISEL: ; %bb.0:
+; GFX11-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l
+; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e64 v0.h, |v1.l|, |v1.l|
+; GFX11-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-GISEL-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h
+; GFX11-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-SDAG-LABEL: v_minimumnum_f16_fabs_rhs:
+; GFX11-FAKE16-SDAG: ; %bb.0:
+; GFX11-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX11-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX11-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-GISEL-LABEL: v_minimumnum_f16_fabs_rhs:
+; GFX11-FAKE16-GISEL: ; %bb.0:
+; GFX11-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX11-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-TRUE16-SDAG-LABEL: v_minimumnum_f16_fabs_rhs:
+; GFX12-TRUE16-SDAG: ; %bb.0:
+; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.h, |v1.l|, |v1.l|
+; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GFX12-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-TRUE16-SDAG-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
+; GFX12-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-TRUE16-GISEL-LABEL: v_minimumnum_f16_fabs_rhs:
+; GFX12-TRUE16-GISEL: ; %bb.0:
+; GFX12-TRUE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.h, |v1.l|, |v1.l|
+; GFX12-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-TRUE16-GISEL-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
+; GFX12-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-FAKE16-SDAG-LABEL: v_minimumnum_f16_fabs_rhs:
+; GFX12-FAKE16-SDAG: ; %bb.0:
+; GFX12-FAKE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v1, |v1|, |v1|
+; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GFX12-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-FAKE16-SDAG-NEXT: v_min_num_f16_e32 v0, v0, v1
+; GFX12-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-FAKE16-GISEL-LABEL: v_minimumnum_f16_fabs_rhs:
+; GFX12-FAKE16-GISEL: ; %bb.0:
+; GFX12-FAKE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v1, |v1|, |v1|
+; GFX12-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-FAKE16-GISEL-NEXT: v_min_num_f16_e32 v0, v0, v1
+; GFX12-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
%fabs.y = call half @llvm.fabs.f16(half %y)
%result = call half @llvm.minimumnum.f16(half %x, half %fabs.y)
ret half %result
}
define half @v_minimumnum_f16_fneg_fabs_rhs(half %x, half %y) {
-; GFX7-LABEL: v_minimumnum_f16_fneg_fabs_rhs:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e64 v1, -|v1|
-; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_minimumnum_f16_fneg_fabs_rhs:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_max_f16_e64 v1, -|v1|, -|v1|
-; GFX8-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX8-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: v_minimumnum_f16_fneg_fabs_rhs:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f16_e64 v1, -|v1|, -|v1|
-; GFX9-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX9-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_minimumnum_f16_fneg_fabs_rhs:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_max_f16_e64 v1, -|v1|, -|v1|
-; GFX10-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX10-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-TRUE16-LABEL: v_minimumnum_f16_fneg_fabs_rhs:
-; GFX11-TRUE16: ; %bb.0:
-; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT: v_max_f16_e64 v0.h, -|v1.l|, -|v1.l|
-; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-TRUE16-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h
-; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FAKE16-LABEL: v_minimumnum_f16_fneg_fabs_rhs:
-; GFX11-FAKE16: ; %bb.0:
-; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FAKE16-NEXT: v_max_f16_e64 v1, -|v1|, -|v1|
-; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-FAKE16-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-TRUE16-LABEL: v_minimumnum_f16_fneg_fabs_rhs:
-; GFX12-TRUE16: ; %bb.0:
-; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
-; GFX12-TRUE16-NEXT: v_max_num_f16_e64 v0.h, -|v1.l|, -|v1.l|
-; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
-; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
-; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-FAKE16-LABEL: v_minimumnum_f16_fneg_fabs_rhs:
-; GFX12-FAKE16: ; %bb.0:
-; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
-; GFX12-FAKE16-NEXT: v_max_num_f16_e64 v1, -|v1|, -|v1|
-; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0
-; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1
-; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_minimumnum_f16_fneg_fabs_rhs:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e64 v1, -|v1|
+; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_minimumnum_f16_fneg_fabs_rhs:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v1, -|v1|
+; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_minimumnum_f16_fneg_fabs_rhs:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_max_f16_e64 v1, -|v1|, -|v1|
+; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_minimumnum_f16_fneg_fabs_rhs:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX8-GISEL-NEXT: v_max_f16_e64 v1, -|v1|, -|v1|
+; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: v_minimumnum_f16_fneg_fabs_rhs:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_max_f16_e64 v1, -|v1|, -|v1|
+; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX9-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: v_minimumnum_f16_fneg_fabs_rhs:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX9-GISEL-NEXT: v_max_f16_e64 v1, -|v1|, -|v1|
+; GFX9-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_minimumnum_f16_fneg_fabs_rhs:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_max_f16_e64 v1, -|v1|, -|v1|
+; GFX10-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX10-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_minimumnum_f16_fneg_fabs_rhs:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX10-GISEL-NEXT: v_max_f16_e64 v1, -|v1|, -|v1|
+; GFX10-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-SDAG-LABEL: v_minimumnum_f16_fneg_fabs_rhs:
+; GFX11-TRUE16-SDAG: ; %bb.0:
+; GFX11-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e64 v0.h, -|v1.l|, -|v1.l|
+; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l
+; GFX11-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-SDAG-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h
+; GFX11-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-GISEL-LABEL: v_minimumnum_f16_fneg_fabs_rhs:
+; GFX11-TRUE16-GISEL: ; %bb.0:
+; GFX11-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l
+; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e64 v0.h, -|v1.l|, -|v1.l|
+; GFX11-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-GISEL-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h
+; GFX11-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-SDAG-LABEL: v_minimumnum_f16_fneg_fabs_rhs:
+; GFX11-FAKE16-SDAG: ; %bb.0:
+; GFX11-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e64 v1, -|v1|, -|v1|
+; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX11-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX11-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-GISEL-LABEL: v_minimumnum_f16_fneg_fabs_rhs:
+; GFX11-FAKE16-GISEL: ; %bb.0:
+; GFX11-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e64 v1, -|v1|, -|v1|
+; GFX11-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-TRUE16-SDAG-LABEL: v_minimumnum_f16_fneg_fabs_rhs:
+; GFX12-TRUE16-SDAG: ; %bb.0:
+; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.h, -|v1.l|, -|v1.l|
+; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GFX12-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-TRUE16-SDAG-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
+; GFX12-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-TRUE16-GISEL-LABEL: v_minimumnum_f16_fneg_fabs_rhs:
+; GFX12-TRUE16-GISEL: ; %bb.0:
+; GFX12-TRUE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.h, -|v1.l|, -|v1.l|
+; GFX12-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-TRUE16-GISEL-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
+; GFX12-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-FAKE16-SDAG-LABEL: v_minimumnum_f16_fneg_fabs_rhs:
+; GFX12-FAKE16-SDAG: ; %bb.0:
+; GFX12-FAKE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v1, -|v1|, -|v1|
+; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GFX12-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-FAKE16-SDAG-NEXT: v_min_num_f16_e32 v0, v0, v1
+; GFX12-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-FAKE16-GISEL-LABEL: v_minimumnum_f16_fneg_fabs_rhs:
+; GFX12-FAKE16-GISEL: ; %bb.0:
+; GFX12-FAKE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v1, -|v1|, -|v1|
+; GFX12-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-FAKE16-GISEL-NEXT: v_min_num_f16_e32 v0, v0, v1
+; GFX12-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
%fabs.y = call half @llvm.fabs.f16(half %y)
%fneg.fabs.y = fneg half %fabs.y
%result = call half @llvm.minimumnum.f16(half %x, half %fneg.fabs.y)
@@ -1689,83 +2759,160 @@ define half @v_minimumnum_f16_fneg_fabs_rhs(half %x, half %y) {
}
define half @v_minimumnum_f16_fabs(half %x, half %y) {
-; GFX7-LABEL: v_minimumnum_f16_fabs:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e64 v0, |v0|
-; GFX7-NEXT: v_cvt_f32_f16_e64 v1, |v1|
-; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_minimumnum_f16_fabs:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_max_f16_e64 v1, |v1|, |v1|
-; GFX8-NEXT: v_max_f16_e64 v0, |v0|, |v0|
-; GFX8-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: v_minimumnum_f16_fabs:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f16_e64 v1, |v1|, |v1|
-; GFX9-NEXT: v_max_f16_e64 v0, |v0|, |v0|
-; GFX9-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_minimumnum_f16_fabs:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_max_f16_e64 v1, |v1|, |v1|
-; GFX10-NEXT: v_max_f16_e64 v0, |v0|, |v0|
-; GFX10-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-TRUE16-LABEL: v_minimumnum_f16_fabs:
-; GFX11-TRUE16: ; %bb.0:
-; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT: v_max_f16_e64 v0.h, |v1.l|, |v1.l|
-; GFX11-TRUE16-NEXT: v_max_f16_e64 v0.l, |v0.l|, |v0.l|
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-TRUE16-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h
-; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FAKE16-LABEL: v_minimumnum_f16_fabs:
-; GFX11-FAKE16: ; %bb.0:
-; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FAKE16-NEXT: v_max_f16_e64 v1, |v1|, |v1|
-; GFX11-FAKE16-NEXT: v_max_f16_e64 v0, |v0|, |v0|
-; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-FAKE16-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-TRUE16-LABEL: v_minimumnum_f16_fabs:
-; GFX12-TRUE16: ; %bb.0:
-; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
-; GFX12-TRUE16-NEXT: v_max_num_f16_e64 v0.h, |v1.l|, |v1.l|
-; GFX12-TRUE16-NEXT: v_max_num_f16_e64 v0.l, |v0.l|, |v0.l|
-; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
-; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-FAKE16-LABEL: v_minimumnum_f16_fabs:
-; GFX12-FAKE16: ; %bb.0:
-; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
-; GFX12-FAKE16-NEXT: v_max_num_f16_e64 v1, |v1|, |v1|
-; GFX12-FAKE16-NEXT: v_max_num_f16_e64 v0, |v0|, |v0|
-; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1
-; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_minimumnum_f16_fabs:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0|
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e64 v1, |v1|
+; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_minimumnum_f16_fabs:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v0, |v0|
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v1, |v1|
+; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_minimumnum_f16_fabs:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX8-SDAG-NEXT: v_max_f16_e64 v0, |v0|, |v0|
+; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_minimumnum_f16_fabs:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_max_f16_e64 v0, |v0|, |v0|
+; GFX8-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: v_minimumnum_f16_fabs:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX9-SDAG-NEXT: v_max_f16_e64 v0, |v0|, |v0|
+; GFX9-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: v_minimumnum_f16_fabs:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_max_f16_e64 v0, |v0|, |v0|
+; GFX9-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX9-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_minimumnum_f16_fabs:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX10-SDAG-NEXT: v_max_f16_e64 v0, |v0|, |v0|
+; GFX10-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_minimumnum_f16_fabs:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_max_f16_e64 v0, |v0|, |v0|
+; GFX10-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX10-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-SDAG-LABEL: v_minimumnum_f16_fabs:
+; GFX11-TRUE16-SDAG: ; %bb.0:
+; GFX11-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e64 v0.h, |v1.l|, |v1.l|
+; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e64 v0.l, |v0.l|, |v0.l|
+; GFX11-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-SDAG-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h
+; GFX11-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-GISEL-LABEL: v_minimumnum_f16_fabs:
+; GFX11-TRUE16-GISEL: ; %bb.0:
+; GFX11-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e64 v0.l, |v0.l|, |v0.l|
+; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e64 v0.h, |v1.l|, |v1.l|
+; GFX11-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-GISEL-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h
+; GFX11-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-SDAG-LABEL: v_minimumnum_f16_fabs:
+; GFX11-FAKE16-SDAG: ; %bb.0:
+; GFX11-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e64 v0, |v0|, |v0|
+; GFX11-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX11-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-GISEL-LABEL: v_minimumnum_f16_fabs:
+; GFX11-FAKE16-GISEL: ; %bb.0:
+; GFX11-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e64 v0, |v0|, |v0|
+; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX11-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-TRUE16-SDAG-LABEL: v_minimumnum_f16_fabs:
+; GFX12-TRUE16-SDAG: ; %bb.0:
+; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.h, |v1.l|, |v1.l|
+; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.l, |v0.l|, |v0.l|
+; GFX12-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-TRUE16-SDAG-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
+; GFX12-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-TRUE16-GISEL-LABEL: v_minimumnum_f16_fabs:
+; GFX12-TRUE16-GISEL: ; %bb.0:
+; GFX12-TRUE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.l, |v0.l|, |v0.l|
+; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.h, |v1.l|, |v1.l|
+; GFX12-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-TRUE16-GISEL-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
+; GFX12-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-FAKE16-SDAG-LABEL: v_minimumnum_f16_fabs:
+; GFX12-FAKE16-SDAG: ; %bb.0:
+; GFX12-FAKE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v1, |v1|, |v1|
+; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v0, |v0|, |v0|
+; GFX12-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-FAKE16-SDAG-NEXT: v_min_num_f16_e32 v0, v0, v1
+; GFX12-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-FAKE16-GISEL-LABEL: v_minimumnum_f16_fabs:
+; GFX12-FAKE16-GISEL: ; %bb.0:
+; GFX12-FAKE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v0, |v0|, |v0|
+; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v1, |v1|, |v1|
+; GFX12-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-FAKE16-GISEL-NEXT: v_min_num_f16_e32 v0, v0, v1
+; GFX12-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
%fabs.x = call half @llvm.fabs.f16(half %x)
%fabs.y = call half @llvm.fabs.f16(half %y)
%result = call half @llvm.minimumnum.f16(half %fabs.x, half %fabs.y)
@@ -1773,83 +2920,160 @@ define half @v_minimumnum_f16_fabs(half %x, half %y) {
}
define half @v_minimumnum_f16_fneg(half %x, half %y) {
-; GFX7-LABEL: v_minimumnum_f16_fneg:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e64 v0, -v0
-; GFX7-NEXT: v_cvt_f16_f32_e64 v1, -v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_minimumnum_f16_fneg:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_max_f16_e64 v1, -v1, -v1
-; GFX8-NEXT: v_max_f16_e64 v0, -v0, -v0
-; GFX8-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: v_minimumnum_f16_fneg:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f16_e64 v1, -v1, -v1
-; GFX9-NEXT: v_max_f16_e64 v0, -v0, -v0
-; GFX9-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_minimumnum_f16_fneg:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_max_f16_e64 v1, -v1, -v1
-; GFX10-NEXT: v_max_f16_e64 v0, -v0, -v0
-; GFX10-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-TRUE16-LABEL: v_minimumnum_f16_fneg:
-; GFX11-TRUE16: ; %bb.0:
-; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT: v_max_f16_e64 v0.h, -v1.l, -v1.l
-; GFX11-TRUE16-NEXT: v_max_f16_e64 v0.l, -v0.l, -v0.l
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-TRUE16-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h
-; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FAKE16-LABEL: v_minimumnum_f16_fneg:
-; GFX11-FAKE16: ; %bb.0:
-; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FAKE16-NEXT: v_max_f16_e64 v1, -v1, -v1
-; GFX11-FAKE16-NEXT: v_max_f16_e64 v0, -v0, -v0
-; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-FAKE16-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-TRUE16-LABEL: v_minimumnum_f16_fneg:
-; GFX12-TRUE16: ; %bb.0:
-; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
-; GFX12-TRUE16-NEXT: v_max_num_f16_e64 v0.h, -v1.l, -v1.l
-; GFX12-TRUE16-NEXT: v_max_num_f16_e64 v0.l, -v0.l, -v0.l
-; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
-; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-FAKE16-LABEL: v_minimumnum_f16_fneg:
-; GFX12-FAKE16: ; %bb.0:
-; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
-; GFX12-FAKE16-NEXT: v_max_num_f16_e64 v1, -v1, -v1
-; GFX12-FAKE16-NEXT: v_max_num_f16_e64 v0, -v0, -v0
-; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1
-; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_minimumnum_f16_fneg:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e64 v0, -v0
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e64 v1, -v1
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_minimumnum_f16_fneg:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v0, -v0
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v1, -v1
+; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_minimumnum_f16_fneg:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_max_f16_e64 v1, -v1, -v1
+; GFX8-SDAG-NEXT: v_max_f16_e64 v0, -v0, -v0
+; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_minimumnum_f16_fneg:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_max_f16_e64 v0, -v0, -v0
+; GFX8-GISEL-NEXT: v_max_f16_e64 v1, -v1, -v1
+; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: v_minimumnum_f16_fneg:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_max_f16_e64 v1, -v1, -v1
+; GFX9-SDAG-NEXT: v_max_f16_e64 v0, -v0, -v0
+; GFX9-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: v_minimumnum_f16_fneg:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_max_f16_e64 v0, -v0, -v0
+; GFX9-GISEL-NEXT: v_max_f16_e64 v1, -v1, -v1
+; GFX9-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_minimumnum_f16_fneg:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_max_f16_e64 v1, -v1, -v1
+; GFX10-SDAG-NEXT: v_max_f16_e64 v0, -v0, -v0
+; GFX10-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_minimumnum_f16_fneg:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_max_f16_e64 v0, -v0, -v0
+; GFX10-GISEL-NEXT: v_max_f16_e64 v1, -v1, -v1
+; GFX10-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-SDAG-LABEL: v_minimumnum_f16_fneg:
+; GFX11-TRUE16-SDAG: ; %bb.0:
+; GFX11-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e64 v0.h, -v1.l, -v1.l
+; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e64 v0.l, -v0.l, -v0.l
+; GFX11-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-SDAG-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h
+; GFX11-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-GISEL-LABEL: v_minimumnum_f16_fneg:
+; GFX11-TRUE16-GISEL: ; %bb.0:
+; GFX11-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e64 v0.l, -v0.l, -v0.l
+; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e64 v0.h, -v1.l, -v1.l
+; GFX11-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-GISEL-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h
+; GFX11-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-SDAG-LABEL: v_minimumnum_f16_fneg:
+; GFX11-FAKE16-SDAG: ; %bb.0:
+; GFX11-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e64 v1, -v1, -v1
+; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e64 v0, -v0, -v0
+; GFX11-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX11-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-GISEL-LABEL: v_minimumnum_f16_fneg:
+; GFX11-FAKE16-GISEL: ; %bb.0:
+; GFX11-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e64 v0, -v0, -v0
+; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e64 v1, -v1, -v1
+; GFX11-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-TRUE16-SDAG-LABEL: v_minimumnum_f16_fneg:
+; GFX12-TRUE16-SDAG: ; %bb.0:
+; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.h, -v1.l, -v1.l
+; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.l, -v0.l, -v0.l
+; GFX12-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-TRUE16-SDAG-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
+; GFX12-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-TRUE16-GISEL-LABEL: v_minimumnum_f16_fneg:
+; GFX12-TRUE16-GISEL: ; %bb.0:
+; GFX12-TRUE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.l, -v0.l, -v0.l
+; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.h, -v1.l, -v1.l
+; GFX12-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-TRUE16-GISEL-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
+; GFX12-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-FAKE16-SDAG-LABEL: v_minimumnum_f16_fneg:
+; GFX12-FAKE16-SDAG: ; %bb.0:
+; GFX12-FAKE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v1, -v1, -v1
+; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v0, -v0, -v0
+; GFX12-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-FAKE16-SDAG-NEXT: v_min_num_f16_e32 v0, v0, v1
+; GFX12-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-FAKE16-GISEL-LABEL: v_minimumnum_f16_fneg:
+; GFX12-FAKE16-GISEL: ; %bb.0:
+; GFX12-FAKE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v0, -v0, -v0
+; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v1, -v1, -v1
+; GFX12-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-FAKE16-GISEL-NEXT: v_min_num_f16_e32 v0, v0, v1
+; GFX12-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
%fneg.x = fneg half %x
%fneg.y = fneg half %y
%result = call half @llvm.minimumnum.f16(half %fneg.x, half %fneg.y)
@@ -1857,166 +3081,313 @@ define half @v_minimumnum_f16_fneg(half %x, half %y) {
}
define double @v_minimumnum_f64_fneg(double %x, double %y) {
-; GFX7-LABEL: v_minimumnum_f64_fneg:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3]
-; GFX7-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
-; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_minimumnum_f64_fneg:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3]
-; GFX8-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
-; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: v_minimumnum_f64_fneg:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3]
-; GFX9-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
-; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_minimumnum_f64_fneg:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3]
-; GFX10-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
-; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: v_minimumnum_f64_fneg:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3]
-; GFX11-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_minimumnum_f64_fneg:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_max_num_f64_e64 v[2:3], -v[2:3], -v[2:3]
-; GFX12-NEXT: v_max_num_f64_e64 v[0:1], -v[0:1], -v[0:1]
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[2:3]
-; GFX12-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_minimumnum_f64_fneg:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3]
+; GFX7-SDAG-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
+; GFX7-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_minimumnum_f64_fneg:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
+; GFX7-GISEL-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3]
+; GFX7-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_minimumnum_f64_fneg:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3]
+; GFX8-SDAG-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
+; GFX8-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_minimumnum_f64_fneg:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
+; GFX8-GISEL-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3]
+; GFX8-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: v_minimumnum_f64_fneg:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3]
+; GFX9-SDAG-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
+; GFX9-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: v_minimumnum_f64_fneg:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
+; GFX9-GISEL-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3]
+; GFX9-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_minimumnum_f64_fneg:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3]
+; GFX10-SDAG-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
+; GFX10-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_minimumnum_f64_fneg:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
+; GFX10-GISEL-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3]
+; GFX10-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_minimumnum_f64_fneg:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3]
+; GFX11-SDAG-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_minimumnum_f64_fneg:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
+; GFX11-GISEL-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3]
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-SDAG-LABEL: v_minimumnum_f64_fneg:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: v_max_num_f64_e64 v[2:3], -v[2:3], -v[2:3]
+; GFX12-SDAG-NEXT: v_max_num_f64_e64 v[0:1], -v[0:1], -v[0:1]
+; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-SDAG-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[2:3]
+; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GISEL-LABEL: v_minimumnum_f64_fneg:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: v_max_num_f64_e64 v[0:1], -v[0:1], -v[0:1]
+; GFX12-GISEL-NEXT: v_max_num_f64_e64 v[2:3], -v[2:3], -v[2:3]
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-GISEL-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[2:3]
+; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
%fneg.x = fneg double %x
%fneg.y = fneg double %y
%result = call double @llvm.minimumnum.f64(double %fneg.x, double %fneg.y)
ret double %result
-}
-
-define <2 x half> @v_minimumnum_v2f16(<2 x half> %x, <2 x half> %y) {
-; GFX7-LABEL: v_minimumnum_v2f16:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
-; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
-; GFX7-NEXT: v_min_f32_e32 v0, v0, v2
-; GFX7-NEXT: v_min_f32_e32 v1, v1, v3
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_minimumnum_v2f16:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_max_f16_sdwa v2, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v3, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_e32 v1, v1, v1
-; GFX8-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX8-NEXT: v_min_f16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX900-LABEL: v_minimumnum_v2f16:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX900-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX900-NEXT: v_pk_min_f16 v0, v0, v1
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX950-LABEL: v_minimumnum_v2f16:
-; GFX950: ; %bb.0:
-; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX950-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX950-NEXT: s_nop 0
-; GFX950-NEXT: v_pk_min_f16 v0, v0, v1
-; GFX950-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_minimumnum_v2f16:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX10-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX10-NEXT: v_pk_min_f16 v0, v0, v1
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: v_minimumnum_v2f16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX11-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_pk_min_f16 v0, v0, v1
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_minimumnum_v2f16:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v1
-; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v0
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT: v_pk_min_num_f16 v0, v0, v1
-; GFX12-NEXT: s_setpc_b64 s[30:31]
+}
+
+define <2 x half> @v_minimumnum_v2f16(<2 x half> %x, <2 x half> %y) {
+; GFX7-SDAG-LABEL: v_minimumnum_v2f16:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX7-SDAG-NEXT: v_min_f32_e32 v1, v1, v3
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_minimumnum_v2f16:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: v_min_f32_e32 v1, v1, v3
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_minimumnum_v2f16:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v2, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v3, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX8-SDAG-NEXT: v_min_f16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v2
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_minimumnum_v2f16:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_max_f16_e32 v2, v0, v0
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v3, v1, v1
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_min_f16_e32 v2, v2, v3
+; GFX8-GISEL-NEXT: v_min_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_minimumnum_v2f16:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX900-SDAG-NEXT: v_pk_min_f16 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_minimumnum_v2f16:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX900-GISEL-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX900-GISEL-NEXT: v_pk_min_f16 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_minimumnum_v2f16:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX950-SDAG-NEXT: s_nop 0
+; GFX950-SDAG-NEXT: v_pk_min_f16 v0, v0, v1
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_minimumnum_v2f16:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX950-GISEL-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX950-GISEL-NEXT: s_nop 0
+; GFX950-GISEL-NEXT: v_pk_min_f16 v0, v0, v1
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_minimumnum_v2f16:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX10-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX10-SDAG-NEXT: v_pk_min_f16 v0, v0, v1
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_minimumnum_v2f16:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX10-GISEL-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX10-GISEL-NEXT: v_pk_min_f16 v0, v0, v1
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_minimumnum_v2f16:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX11-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_pk_min_f16 v0, v0, v1
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_minimumnum_v2f16:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX11-GISEL-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_pk_min_f16 v0, v0, v1
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-SDAG-LABEL: v_minimumnum_v2f16:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-SDAG-NEXT: v_pk_min_num_f16 v0, v0, v1
+; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GISEL-LABEL: v_minimumnum_v2f16:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-GISEL-NEXT: v_pk_min_num_f16 v0, v0, v1
+; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call <2 x half> @llvm.minimumnum.v2f16(<2 x half> %x, <2 x half> %y)
ret <2 x half> %result
}
define <2 x half> @v_minimumnum_v2f16_nnan(<2 x half> %x, <2 x half> %y) {
-; GFX7-LABEL: v_minimumnum_v2f16_nnan:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
-; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_min_f32_e32 v0, v0, v2
-; GFX7-NEXT: v_min_f32_e32 v1, v1, v3
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_minimumnum_v2f16_nnan:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_min_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
-; GFX8-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_minimumnum_v2f16_nnan:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX7-SDAG-NEXT: v_min_f32_e32 v1, v1, v3
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_minimumnum_v2f16_nnan:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: v_min_f32_e32 v1, v1, v3
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_minimumnum_v2f16_nnan:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_min_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v2
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_minimumnum_v2f16_nnan:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_min_f16_e32 v2, v0, v1
+; GFX8-GISEL-NEXT: v_min_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_minimumnum_v2f16_nnan:
; GFX9: ; %bb.0:
@@ -2050,135 +3421,243 @@ define <2 x half> @v_minimumnum_v2f16_nnan(<2 x half> %x, <2 x half> %y) {
}
define <3 x half> @v_minimumnum_v3f16(<3 x half> %x, <3 x half> %y) {
-; GFX7-LABEL: v_minimumnum_v3f16:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
-; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
-; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
-; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
-; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
-; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
-; GFX7-NEXT: v_min_f32_e32 v0, v0, v3
-; GFX7-NEXT: v_min_f32_e32 v1, v1, v4
-; GFX7-NEXT: v_min_f32_e32 v2, v2, v5
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_minimumnum_v3f16:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_max_f16_sdwa v4, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v5, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_e32 v2, v2, v2
-; GFX8-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX8-NEXT: v_min_f16_sdwa v4, v5, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_max_f16_e32 v3, v3, v3
-; GFX8-NEXT: v_max_f16_e32 v1, v1, v1
-; GFX8-NEXT: v_min_f16_e32 v0, v0, v2
-; GFX8-NEXT: v_min_f16_e32 v1, v1, v3
-; GFX8-NEXT: v_or_b32_e32 v0, v0, v4
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX900-LABEL: v_minimumnum_v3f16:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_pk_max_f16 v2, v2, v2
-; GFX900-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX900-NEXT: v_pk_min_f16 v0, v0, v2
-; GFX900-NEXT: v_pk_max_f16 v2, v3, v3
-; GFX900-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX900-NEXT: v_pk_min_f16 v1, v1, v2
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX950-LABEL: v_minimumnum_v3f16:
-; GFX950: ; %bb.0:
-; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_pk_max_f16 v2, v2, v2
-; GFX950-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX950-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX950-NEXT: v_pk_min_f16 v0, v0, v2
-; GFX950-NEXT: v_pk_max_f16 v2, v3, v3
-; GFX950-NEXT: s_nop 0
-; GFX950-NEXT: v_pk_min_f16 v1, v1, v2
-; GFX950-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_minimumnum_v3f16:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_pk_max_f16 v2, v2, v2
-; GFX10-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX10-NEXT: v_pk_max_f16 v3, v3, v3
-; GFX10-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX10-NEXT: v_pk_min_f16 v0, v0, v2
-; GFX10-NEXT: v_pk_min_f16 v1, v1, v3
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: v_minimumnum_v3f16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_pk_max_f16 v2, v2, v2
-; GFX11-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX11-NEXT: v_pk_max_f16 v3, v3, v3
-; GFX11-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_pk_min_f16 v0, v0, v2
-; GFX11-NEXT: v_pk_min_f16 v1, v1, v3
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_minimumnum_v3f16:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_pk_max_num_f16 v2, v2, v2
-; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v0
-; GFX12-NEXT: v_pk_max_num_f16 v3, v3, v3
-; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v1
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX12-NEXT: v_pk_min_num_f16 v0, v0, v2
-; GFX12-NEXT: v_pk_min_num_f16 v1, v1, v3
-; GFX12-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_minimumnum_v3f16:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v5, v5
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v4, v4
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v3
+; GFX7-SDAG-NEXT: v_min_f32_e32 v1, v1, v4
+; GFX7-SDAG-NEXT: v_min_f32_e32 v2, v2, v5
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_minimumnum_v3f16:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v3
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v4
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v5
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: v_min_f32_e32 v1, v1, v3
+; GFX7-GISEL-NEXT: v_min_f32_e32 v2, v2, v4
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_minimumnum_v3f16:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v4, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v5, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_e32 v2, v2, v2
+; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX8-SDAG-NEXT: v_min_f16_sdwa v4, v5, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_max_f16_e32 v3, v3, v3
+; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v2
+; GFX8-SDAG-NEXT: v_min_f16_e32 v1, v1, v3
+; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v4
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_minimumnum_v3f16:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_max_f16_e32 v4, v0, v0
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v5, v2, v2
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v2, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v3, v3, v3
+; GFX8-GISEL-NEXT: v_min_f16_e32 v4, v4, v5
+; GFX8-GISEL-NEXT: v_min_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_min_f16_e32 v1, v1, v3
+; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_minimumnum_v3f16:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX900-SDAG-NEXT: v_pk_min_f16 v0, v0, v2
+; GFX900-SDAG-NEXT: v_pk_max_f16 v2, v3, v3
+; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX900-SDAG-NEXT: v_pk_min_f16 v1, v1, v2
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: v_minimumnum_v3f16:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX9-GISEL-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX9-GISEL-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX9-GISEL-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX9-GISEL-NEXT: v_pk_min_f16 v0, v0, v2
+; GFX9-GISEL-NEXT: v_pk_min_f16 v1, v1, v3
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_minimumnum_v3f16:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX950-SDAG-NEXT: v_pk_min_f16 v0, v0, v2
+; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v3, v3
+; GFX950-SDAG-NEXT: s_nop 0
+; GFX950-SDAG-NEXT: v_pk_min_f16 v1, v1, v2
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_minimumnum_v3f16:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX10-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX10-SDAG-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX10-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX10-SDAG-NEXT: v_pk_min_f16 v0, v0, v2
+; GFX10-SDAG-NEXT: v_pk_min_f16 v1, v1, v3
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_minimumnum_v3f16:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX10-GISEL-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX10-GISEL-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX10-GISEL-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX10-GISEL-NEXT: v_pk_min_f16 v0, v0, v2
+; GFX10-GISEL-NEXT: v_pk_min_f16 v1, v1, v3
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_minimumnum_v3f16:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX11-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX11-SDAG-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX11-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-NEXT: v_pk_min_f16 v0, v0, v2
+; GFX11-SDAG-NEXT: v_pk_min_f16 v1, v1, v3
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_minimumnum_v3f16:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX11-GISEL-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX11-GISEL-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX11-GISEL-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_pk_min_f16 v0, v0, v2
+; GFX11-GISEL-NEXT: v_pk_min_f16 v1, v1, v3
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-SDAG-LABEL: v_minimumnum_v3f16:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v2
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v3
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX12-SDAG-NEXT: v_pk_min_num_f16 v0, v0, v2
+; GFX12-SDAG-NEXT: v_pk_min_num_f16 v1, v1, v3
+; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GISEL-LABEL: v_minimumnum_v3f16:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v2
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v3
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX12-GISEL-NEXT: v_pk_min_num_f16 v0, v0, v2
+; GFX12-GISEL-NEXT: v_pk_min_num_f16 v1, v1, v3
+; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call <3 x half> @llvm.minimumnum.v3f16(<3 x half> %x, <3 x half> %y)
ret <3 x half> %result
}
define <3 x half> @v_minimumnum_v3f16_nnan(<3 x half> %x, <3 x half> %y) {
-; GFX7-LABEL: v_minimumnum_v3f16_nnan:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
-; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
-; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
-; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
-; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
-; GFX7-NEXT: v_min_f32_e32 v0, v0, v3
-; GFX7-NEXT: v_min_f32_e32 v1, v1, v4
-; GFX7-NEXT: v_min_f32_e32 v2, v2, v5
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_minimumnum_v3f16_nnan:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_min_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_min_f16_e32 v0, v0, v2
-; GFX8-NEXT: v_min_f16_e32 v1, v1, v3
-; GFX8-NEXT: v_or_b32_e32 v0, v0, v4
-; GFX8-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_minimumnum_v3f16_nnan:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v5, v5
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v4, v4
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v3
+; GFX7-SDAG-NEXT: v_min_f32_e32 v1, v1, v4
+; GFX7-SDAG-NEXT: v_min_f32_e32 v2, v2, v5
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_minimumnum_v3f16_nnan:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v3
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v4
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v5
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: v_min_f32_e32 v1, v1, v3
+; GFX7-GISEL-NEXT: v_min_f32_e32 v2, v2, v4
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_minimumnum_v3f16_nnan:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_min_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v2
+; GFX8-SDAG-NEXT: v_min_f16_e32 v1, v1, v3
+; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v4
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_minimumnum_v3f16_nnan:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_min_f16_e32 v4, v0, v2
+; GFX8-GISEL-NEXT: v_min_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_min_f16_e32 v1, v1, v3
+; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_minimumnum_v3f16_nnan:
; GFX9: ; %bb.0:
@@ -2216,151 +3695,273 @@ define <3 x half> @v_minimumnum_v3f16_nnan(<3 x half> %x, <3 x half> %y) {
}
define <4 x half> @v_minimumnum_v4f16(<4 x half> %x, <4 x half> %y) {
-; GFX7-LABEL: v_minimumnum_v4f16:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
-; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
-; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
-; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
-; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
-; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
-; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
-; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
-; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
-; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
-; GFX7-NEXT: v_min_f32_e32 v0, v0, v4
-; GFX7-NEXT: v_min_f32_e32 v1, v1, v5
-; GFX7-NEXT: v_min_f32_e32 v2, v2, v6
-; GFX7-NEXT: v_min_f32_e32 v3, v3, v7
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_minimumnum_v4f16:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_max_f16_sdwa v4, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v5, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_min_f16_sdwa v4, v5, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_max_f16_sdwa v5, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v6, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_e32 v3, v3, v3
-; GFX8-NEXT: v_max_f16_e32 v1, v1, v1
-; GFX8-NEXT: v_max_f16_e32 v2, v2, v2
-; GFX8-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX8-NEXT: v_min_f16_sdwa v5, v6, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_min_f16_e32 v1, v1, v3
-; GFX8-NEXT: v_min_f16_e32 v0, v0, v2
-; GFX8-NEXT: v_or_b32_e32 v0, v0, v5
-; GFX8-NEXT: v_or_b32_e32 v1, v1, v4
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX900-LABEL: v_minimumnum_v4f16:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_pk_max_f16 v2, v2, v2
-; GFX900-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX900-NEXT: v_pk_min_f16 v0, v0, v2
-; GFX900-NEXT: v_pk_max_f16 v2, v3, v3
-; GFX900-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX900-NEXT: v_pk_min_f16 v1, v1, v2
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX950-LABEL: v_minimumnum_v4f16:
-; GFX950: ; %bb.0:
-; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_pk_max_f16 v2, v2, v2
-; GFX950-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX950-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX950-NEXT: v_pk_min_f16 v0, v0, v2
-; GFX950-NEXT: v_pk_max_f16 v2, v3, v3
-; GFX950-NEXT: s_nop 0
-; GFX950-NEXT: v_pk_min_f16 v1, v1, v2
-; GFX950-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_minimumnum_v4f16:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_pk_max_f16 v2, v2, v2
-; GFX10-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX10-NEXT: v_pk_max_f16 v3, v3, v3
-; GFX10-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX10-NEXT: v_pk_min_f16 v0, v0, v2
-; GFX10-NEXT: v_pk_min_f16 v1, v1, v3
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: v_minimumnum_v4f16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_pk_max_f16 v2, v2, v2
-; GFX11-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX11-NEXT: v_pk_max_f16 v3, v3, v3
-; GFX11-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_pk_min_f16 v0, v0, v2
-; GFX11-NEXT: v_pk_min_f16 v1, v1, v3
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_minimumnum_v4f16:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_pk_max_num_f16 v2, v2, v2
-; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v0
-; GFX12-NEXT: v_pk_max_num_f16 v3, v3, v3
-; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v1
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX12-NEXT: v_pk_min_num_f16 v0, v0, v2
-; GFX12-NEXT: v_pk_min_num_f16 v1, v1, v3
-; GFX12-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_minimumnum_v4f16:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v7, v7
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v6, v6
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v4, v4
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v5, v5
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v6, v6
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v7, v7
+; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v4
+; GFX7-SDAG-NEXT: v_min_f32_e32 v1, v1, v5
+; GFX7-SDAG-NEXT: v_min_f32_e32 v2, v2, v6
+; GFX7-SDAG-NEXT: v_min_f32_e32 v3, v3, v7
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_minimumnum_v4f16:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v4
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v6
+; GFX7-GISEL-NEXT: v_min_f32_e32 v1, v1, v5
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v5, v7
+; GFX7-GISEL-NEXT: v_min_f32_e32 v2, v2, v4
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-GISEL-NEXT: v_min_f32_e32 v3, v3, v5
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_minimumnum_v4f16:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v4, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v5, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_min_f16_sdwa v4, v5, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v5, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v6, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_e32 v3, v3, v3
+; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX8-SDAG-NEXT: v_max_f16_e32 v2, v2, v2
+; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX8-SDAG-NEXT: v_min_f16_sdwa v5, v6, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_min_f16_e32 v1, v1, v3
+; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v2
+; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v5
+; GFX8-SDAG-NEXT: v_or_b32_e32 v1, v1, v4
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_minimumnum_v4f16:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_max_f16_e32 v4, v0, v0
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v5, v1, v1
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v6, v2, v2
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v2, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v7, v3, v3
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v3, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_min_f16_e32 v4, v4, v6
+; GFX8-GISEL-NEXT: v_min_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_min_f16_e32 v2, v5, v7
+; GFX8-GISEL-NEXT: v_min_f16_sdwa v1, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0
+; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v2, v1
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_minimumnum_v4f16:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX900-SDAG-NEXT: v_pk_min_f16 v0, v0, v2
+; GFX900-SDAG-NEXT: v_pk_max_f16 v2, v3, v3
+; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX900-SDAG-NEXT: v_pk_min_f16 v1, v1, v2
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: v_minimumnum_v4f16:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX9-GISEL-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX9-GISEL-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX9-GISEL-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX9-GISEL-NEXT: v_pk_min_f16 v0, v0, v2
+; GFX9-GISEL-NEXT: v_pk_min_f16 v1, v1, v3
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_minimumnum_v4f16:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX950-SDAG-NEXT: v_pk_min_f16 v0, v0, v2
+; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v3, v3
+; GFX950-SDAG-NEXT: s_nop 0
+; GFX950-SDAG-NEXT: v_pk_min_f16 v1, v1, v2
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_minimumnum_v4f16:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX10-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX10-SDAG-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX10-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX10-SDAG-NEXT: v_pk_min_f16 v0, v0, v2
+; GFX10-SDAG-NEXT: v_pk_min_f16 v1, v1, v3
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_minimumnum_v4f16:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX10-GISEL-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX10-GISEL-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX10-GISEL-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX10-GISEL-NEXT: v_pk_min_f16 v0, v0, v2
+; GFX10-GISEL-NEXT: v_pk_min_f16 v1, v1, v3
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_minimumnum_v4f16:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX11-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX11-SDAG-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX11-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-NEXT: v_pk_min_f16 v0, v0, v2
+; GFX11-SDAG-NEXT: v_pk_min_f16 v1, v1, v3
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_minimumnum_v4f16:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX11-GISEL-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX11-GISEL-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX11-GISEL-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_pk_min_f16 v0, v0, v2
+; GFX11-GISEL-NEXT: v_pk_min_f16 v1, v1, v3
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-SDAG-LABEL: v_minimumnum_v4f16:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v2
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v3
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX12-SDAG-NEXT: v_pk_min_num_f16 v0, v0, v2
+; GFX12-SDAG-NEXT: v_pk_min_num_f16 v1, v1, v3
+; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GISEL-LABEL: v_minimumnum_v4f16:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v2
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v3
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX12-GISEL-NEXT: v_pk_min_num_f16 v0, v0, v2
+; GFX12-GISEL-NEXT: v_pk_min_num_f16 v1, v1, v3
+; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call <4 x half> @llvm.minimumnum.v4f16(<4 x half> %x, <4 x half> %y)
ret <4 x half> %result
}
define <4 x half> @v_minimumnum_v4f16_nnan(<4 x half> %x, <4 x half> %y) {
-; GFX7-LABEL: v_minimumnum_v4f16_nnan:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
-; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
-; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
-; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
-; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
-; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
-; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
-; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
-; GFX7-NEXT: v_min_f32_e32 v0, v0, v4
-; GFX7-NEXT: v_min_f32_e32 v1, v1, v5
-; GFX7-NEXT: v_min_f32_e32 v2, v2, v6
-; GFX7-NEXT: v_min_f32_e32 v3, v3, v7
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_minimumnum_v4f16_nnan:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_min_f16_sdwa v4, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_min_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_min_f16_e32 v1, v1, v3
-; GFX8-NEXT: v_min_f16_e32 v0, v0, v2
-; GFX8-NEXT: v_or_b32_e32 v0, v0, v5
-; GFX8-NEXT: v_or_b32_e32 v1, v1, v4
-; GFX8-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_minimumnum_v4f16_nnan:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v7, v7
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v6, v6
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v5, v5
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v4, v4
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v7, v7
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v6, v6
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v4
+; GFX7-SDAG-NEXT: v_min_f32_e32 v1, v1, v5
+; GFX7-SDAG-NEXT: v_min_f32_e32 v2, v2, v6
+; GFX7-SDAG-NEXT: v_min_f32_e32 v3, v3, v7
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_minimumnum_v4f16_nnan:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v4
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v6
+; GFX7-GISEL-NEXT: v_min_f32_e32 v1, v1, v5
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v5, v7
+; GFX7-GISEL-NEXT: v_min_f32_e32 v2, v2, v4
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-GISEL-NEXT: v_min_f32_e32 v3, v3, v5
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_minimumnum_v4f16_nnan:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_min_f16_sdwa v4, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_min_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_min_f16_e32 v1, v1, v3
+; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v2
+; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v5
+; GFX8-SDAG-NEXT: v_or_b32_e32 v1, v1, v4
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_minimumnum_v4f16_nnan:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_min_f16_e32 v4, v0, v2
+; GFX8-GISEL-NEXT: v_min_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_min_f16_e32 v2, v1, v3
+; GFX8-GISEL-NEXT: v_min_f16_sdwa v1, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0
+; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v2, v1
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_minimumnum_v4f16_nnan:
; GFX9: ; %bb.0:
@@ -2398,1349 +3999,2545 @@ define <4 x half> @v_minimumnum_v4f16_nnan(<4 x half> %x, <4 x half> %y) {
}
define <6 x half> @v_minimumnum_v6f16(<6 x half> %x, <6 x half> %y) {
-; GFX7-LABEL: v_minimumnum_v6f16:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
-; GFX7-NEXT: v_cvt_f16_f32_e32 v11, v11
-; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
-; GFX7-NEXT: v_cvt_f16_f32_e32 v10, v10
-; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX7-NEXT: v_cvt_f16_f32_e32 v9, v9
-; GFX7-NEXT: v_cvt_f16_f32_e32 v8, v8
-; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
-; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
-; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
-; GFX7-NEXT: v_cvt_f32_f16_e32 v11, v11
-; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
-; GFX7-NEXT: v_cvt_f32_f16_e32 v10, v10
-; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
-; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
-; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
-; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
-; GFX7-NEXT: v_cvt_f32_f16_e32 v8, v8
-; GFX7-NEXT: v_cvt_f32_f16_e32 v9, v9
-; GFX7-NEXT: v_min_f32_e32 v0, v0, v6
-; GFX7-NEXT: v_min_f32_e32 v1, v1, v7
-; GFX7-NEXT: v_min_f32_e32 v2, v2, v8
-; GFX7-NEXT: v_min_f32_e32 v3, v3, v9
-; GFX7-NEXT: v_min_f32_e32 v4, v4, v10
-; GFX7-NEXT: v_min_f32_e32 v5, v5, v11
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_minimumnum_v6f16:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_max_f16_sdwa v6, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v7, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_min_f16_sdwa v6, v7, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_max_f16_sdwa v7, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v8, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_min_f16_sdwa v7, v8, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_max_f16_sdwa v8, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v9, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_e32 v5, v5, v5
-; GFX8-NEXT: v_max_f16_e32 v2, v2, v2
-; GFX8-NEXT: v_max_f16_e32 v4, v4, v4
-; GFX8-NEXT: v_max_f16_e32 v1, v1, v1
-; GFX8-NEXT: v_max_f16_e32 v3, v3, v3
-; GFX8-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX8-NEXT: v_min_f16_sdwa v8, v9, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_min_f16_e32 v2, v2, v5
-; GFX8-NEXT: v_min_f16_e32 v1, v1, v4
-; GFX8-NEXT: v_min_f16_e32 v0, v0, v3
-; GFX8-NEXT: v_or_b32_e32 v0, v0, v8
-; GFX8-NEXT: v_or_b32_e32 v1, v1, v7
-; GFX8-NEXT: v_or_b32_e32 v2, v2, v6
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX900-LABEL: v_minimumnum_v6f16:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_pk_max_f16 v3, v3, v3
-; GFX900-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX900-NEXT: v_pk_min_f16 v0, v0, v3
-; GFX900-NEXT: v_pk_max_f16 v3, v4, v4
-; GFX900-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX900-NEXT: v_pk_min_f16 v1, v1, v3
-; GFX900-NEXT: v_pk_max_f16 v3, v5, v5
-; GFX900-NEXT: v_pk_max_f16 v2, v2, v2
-; GFX900-NEXT: v_pk_min_f16 v2, v2, v3
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX950-LABEL: v_minimumnum_v6f16:
-; GFX950: ; %bb.0:
-; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_pk_max_f16 v3, v3, v3
-; GFX950-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX950-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX950-NEXT: v_pk_min_f16 v0, v0, v3
-; GFX950-NEXT: v_pk_max_f16 v3, v4, v4
-; GFX950-NEXT: v_pk_max_f16 v2, v2, v2
-; GFX950-NEXT: v_pk_min_f16 v1, v1, v3
-; GFX950-NEXT: v_pk_max_f16 v3, v5, v5
-; GFX950-NEXT: s_nop 0
-; GFX950-NEXT: v_pk_min_f16 v2, v2, v3
-; GFX950-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_minimumnum_v6f16:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_pk_max_f16 v3, v3, v3
-; GFX10-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX10-NEXT: v_pk_max_f16 v4, v4, v4
-; GFX10-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX10-NEXT: v_pk_max_f16 v5, v5, v5
-; GFX10-NEXT: v_pk_max_f16 v2, v2, v2
-; GFX10-NEXT: v_pk_min_f16 v0, v0, v3
-; GFX10-NEXT: v_pk_min_f16 v1, v1, v4
-; GFX10-NEXT: v_pk_min_f16 v2, v2, v5
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: v_minimumnum_v6f16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_pk_max_f16 v3, v3, v3
-; GFX11-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX11-NEXT: v_pk_max_f16 v4, v4, v4
-; GFX11-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX11-NEXT: v_pk_max_f16 v5, v5, v5
-; GFX11-NEXT: v_pk_max_f16 v2, v2, v2
-; GFX11-NEXT: v_pk_min_f16 v0, v0, v3
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_pk_min_f16 v1, v1, v4
-; GFX11-NEXT: v_pk_min_f16 v2, v2, v5
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_minimumnum_v6f16:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_pk_max_num_f16 v3, v3, v3
-; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v0
-; GFX12-NEXT: v_pk_max_num_f16 v4, v4, v4
-; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v1
-; GFX12-NEXT: v_pk_max_num_f16 v5, v5, v5
-; GFX12-NEXT: v_pk_max_num_f16 v2, v2, v2
-; GFX12-NEXT: v_pk_min_num_f16 v0, v0, v3
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX12-NEXT: v_pk_min_num_f16 v1, v1, v4
-; GFX12-NEXT: v_pk_min_num_f16 v2, v2, v5
-; GFX12-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_minimumnum_v6f16:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v5, v5
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v11, v11
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v4, v4
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v10, v10
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v9, v9
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v8, v8
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v6, v6
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v7, v7
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v11, v11
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v10, v10
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v6, v6
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v7, v7
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v8, v8
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v9, v9
+; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v6
+; GFX7-SDAG-NEXT: v_min_f32_e32 v1, v1, v7
+; GFX7-SDAG-NEXT: v_min_f32_e32 v2, v2, v8
+; GFX7-SDAG-NEXT: v_min_f32_e32 v3, v3, v9
+; GFX7-SDAG-NEXT: v_min_f32_e32 v4, v4, v10
+; GFX7-SDAG-NEXT: v_min_f32_e32 v5, v5, v11
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_minimumnum_v6f16:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v6, v6
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v7, v7
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v6
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v6, v8
+; GFX7-GISEL-NEXT: v_min_f32_e32 v1, v1, v7
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v7, v9
+; GFX7-GISEL-NEXT: v_min_f32_e32 v2, v2, v6
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v6, v10
+; GFX7-GISEL-NEXT: v_min_f32_e32 v3, v3, v7
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v7, v11
+; GFX7-GISEL-NEXT: v_min_f32_e32 v4, v4, v6
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-GISEL-NEXT: v_min_f32_e32 v5, v5, v7
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v4, v4
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v5, v5
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_minimumnum_v6f16:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v6, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v7, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_min_f16_sdwa v6, v7, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v7, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v8, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_min_f16_sdwa v7, v8, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v8, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v9, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_e32 v5, v5, v5
+; GFX8-SDAG-NEXT: v_max_f16_e32 v2, v2, v2
+; GFX8-SDAG-NEXT: v_max_f16_e32 v4, v4, v4
+; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX8-SDAG-NEXT: v_max_f16_e32 v3, v3, v3
+; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX8-SDAG-NEXT: v_min_f16_sdwa v8, v9, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_min_f16_e32 v2, v2, v5
+; GFX8-SDAG-NEXT: v_min_f16_e32 v1, v1, v4
+; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v3
+; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v8
+; GFX8-SDAG-NEXT: v_or_b32_e32 v1, v1, v7
+; GFX8-SDAG-NEXT: v_or_b32_e32 v2, v2, v6
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_minimumnum_v6f16:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_max_f16_e32 v6, v0, v0
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v7, v1, v1
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v8, v2, v2
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v2, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v9, v3, v3
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v3, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v10, v4, v4
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v4, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v11, v5, v5
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v5, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_min_f16_e32 v6, v6, v9
+; GFX8-GISEL-NEXT: v_min_f16_sdwa v0, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_min_f16_e32 v3, v7, v10
+; GFX8-GISEL-NEXT: v_min_f16_sdwa v1, v1, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_min_f16_e32 v4, v8, v11
+; GFX8-GISEL-NEXT: v_min_f16_sdwa v2, v2, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v6, v0
+; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v3, v1
+; GFX8-GISEL-NEXT: v_or_b32_e32 v2, v4, v2
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_minimumnum_v6f16:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX900-SDAG-NEXT: v_pk_min_f16 v0, v0, v3
+; GFX900-SDAG-NEXT: v_pk_max_f16 v3, v4, v4
+; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX900-SDAG-NEXT: v_pk_min_f16 v1, v1, v3
+; GFX900-SDAG-NEXT: v_pk_max_f16 v3, v5, v5
+; GFX900-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX900-SDAG-NEXT: v_pk_min_f16 v2, v2, v3
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: v_minimumnum_v6f16:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX9-GISEL-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX9-GISEL-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX9-GISEL-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX9-GISEL-NEXT: v_pk_max_f16 v4, v4, v4
+; GFX9-GISEL-NEXT: v_pk_max_f16 v5, v5, v5
+; GFX9-GISEL-NEXT: v_pk_min_f16 v0, v0, v3
+; GFX9-GISEL-NEXT: v_pk_min_f16 v1, v1, v4
+; GFX9-GISEL-NEXT: v_pk_min_f16 v2, v2, v5
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_minimumnum_v6f16:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX950-SDAG-NEXT: v_pk_min_f16 v0, v0, v3
+; GFX950-SDAG-NEXT: v_pk_max_f16 v3, v4, v4
+; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX950-SDAG-NEXT: v_pk_min_f16 v1, v1, v3
+; GFX950-SDAG-NEXT: v_pk_max_f16 v3, v5, v5
+; GFX950-SDAG-NEXT: s_nop 0
+; GFX950-SDAG-NEXT: v_pk_min_f16 v2, v2, v3
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_minimumnum_v6f16:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX10-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX10-SDAG-NEXT: v_pk_max_f16 v4, v4, v4
+; GFX10-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX10-SDAG-NEXT: v_pk_max_f16 v5, v5, v5
+; GFX10-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX10-SDAG-NEXT: v_pk_min_f16 v0, v0, v3
+; GFX10-SDAG-NEXT: v_pk_min_f16 v1, v1, v4
+; GFX10-SDAG-NEXT: v_pk_min_f16 v2, v2, v5
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_minimumnum_v6f16:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX10-GISEL-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX10-GISEL-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX10-GISEL-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX10-GISEL-NEXT: v_pk_max_f16 v4, v4, v4
+; GFX10-GISEL-NEXT: v_pk_max_f16 v5, v5, v5
+; GFX10-GISEL-NEXT: v_pk_min_f16 v0, v0, v3
+; GFX10-GISEL-NEXT: v_pk_min_f16 v1, v1, v4
+; GFX10-GISEL-NEXT: v_pk_min_f16 v2, v2, v5
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_minimumnum_v6f16:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX11-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX11-SDAG-NEXT: v_pk_max_f16 v4, v4, v4
+; GFX11-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX11-SDAG-NEXT: v_pk_max_f16 v5, v5, v5
+; GFX11-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX11-SDAG-NEXT: v_pk_min_f16 v0, v0, v3
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-SDAG-NEXT: v_pk_min_f16 v1, v1, v4
+; GFX11-SDAG-NEXT: v_pk_min_f16 v2, v2, v5
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_minimumnum_v6f16:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX11-GISEL-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX11-GISEL-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX11-GISEL-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX11-GISEL-NEXT: v_pk_max_f16 v4, v4, v4
+; GFX11-GISEL-NEXT: v_pk_max_f16 v5, v5, v5
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-GISEL-NEXT: v_pk_min_f16 v0, v0, v3
+; GFX11-GISEL-NEXT: v_pk_min_f16 v1, v1, v4
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX11-GISEL-NEXT: v_pk_min_f16 v2, v2, v5
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-SDAG-LABEL: v_minimumnum_v6f16:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v3
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v4, v4, v4
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v5, v5, v5
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v2
+; GFX12-SDAG-NEXT: v_pk_min_num_f16 v0, v0, v3
+; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX12-SDAG-NEXT: v_pk_min_num_f16 v1, v1, v4
+; GFX12-SDAG-NEXT: v_pk_min_num_f16 v2, v2, v5
+; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GISEL-LABEL: v_minimumnum_v6f16:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v2
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v3
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v4, v4, v4
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v5, v5, v5
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX12-GISEL-NEXT: v_pk_min_num_f16 v0, v0, v3
+; GFX12-GISEL-NEXT: v_pk_min_num_f16 v1, v1, v4
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX12-GISEL-NEXT: v_pk_min_num_f16 v2, v2, v5
+; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call <6 x half> @llvm.minimumnum.v6f16(<6 x half> %x, <6 x half> %y)
ret <6 x half> %result
}
define <8 x half> @v_minimumnum_v8f16(<8 x half> %x, <8 x half> %y) {
-; GFX7-LABEL: v_minimumnum_v8f16:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
-; GFX7-NEXT: v_cvt_f16_f32_e32 v15, v15
-; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
-; GFX7-NEXT: v_cvt_f16_f32_e32 v14, v14
-; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
-; GFX7-NEXT: v_cvt_f16_f32_e32 v13, v13
-; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
-; GFX7-NEXT: v_cvt_f16_f32_e32 v12, v12
-; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX7-NEXT: v_cvt_f16_f32_e32 v11, v11
-; GFX7-NEXT: v_cvt_f16_f32_e32 v10, v10
-; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f16_f32_e32 v8, v8
-; GFX7-NEXT: v_cvt_f16_f32_e32 v9, v9
-; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
-; GFX7-NEXT: v_cvt_f32_f16_e32 v15, v15
-; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
-; GFX7-NEXT: v_cvt_f32_f16_e32 v14, v14
-; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
-; GFX7-NEXT: v_cvt_f32_f16_e32 v13, v13
-; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
-; GFX7-NEXT: v_cvt_f32_f16_e32 v12, v12
-; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
-; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
-; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v8, v8
-; GFX7-NEXT: v_cvt_f32_f16_e32 v9, v9
-; GFX7-NEXT: v_cvt_f32_f16_e32 v10, v10
-; GFX7-NEXT: v_cvt_f32_f16_e32 v11, v11
-; GFX7-NEXT: v_min_f32_e32 v0, v0, v8
-; GFX7-NEXT: v_min_f32_e32 v1, v1, v9
-; GFX7-NEXT: v_min_f32_e32 v2, v2, v10
-; GFX7-NEXT: v_min_f32_e32 v3, v3, v11
-; GFX7-NEXT: v_min_f32_e32 v4, v4, v12
-; GFX7-NEXT: v_min_f32_e32 v5, v5, v13
-; GFX7-NEXT: v_min_f32_e32 v6, v6, v14
-; GFX7-NEXT: v_min_f32_e32 v7, v7, v15
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_minimumnum_v8f16:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_max_f16_sdwa v8, v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v9, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_min_f16_sdwa v8, v9, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_max_f16_sdwa v9, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v10, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_min_f16_sdwa v9, v10, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_max_f16_sdwa v10, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v11, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_min_f16_sdwa v10, v11, v10 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_max_f16_sdwa v11, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v12, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_e32 v7, v7, v7
-; GFX8-NEXT: v_max_f16_e32 v3, v3, v3
-; GFX8-NEXT: v_max_f16_e32 v6, v6, v6
-; GFX8-NEXT: v_max_f16_e32 v2, v2, v2
-; GFX8-NEXT: v_max_f16_e32 v5, v5, v5
-; GFX8-NEXT: v_max_f16_e32 v1, v1, v1
-; GFX8-NEXT: v_max_f16_e32 v4, v4, v4
-; GFX8-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX8-NEXT: v_min_f16_sdwa v11, v12, v11 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_min_f16_e32 v3, v3, v7
-; GFX8-NEXT: v_min_f16_e32 v2, v2, v6
-; GFX8-NEXT: v_min_f16_e32 v1, v1, v5
-; GFX8-NEXT: v_min_f16_e32 v0, v0, v4
-; GFX8-NEXT: v_or_b32_e32 v0, v0, v11
-; GFX8-NEXT: v_or_b32_e32 v1, v1, v10
-; GFX8-NEXT: v_or_b32_e32 v2, v2, v9
-; GFX8-NEXT: v_or_b32_e32 v3, v3, v8
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX900-LABEL: v_minimumnum_v8f16:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_pk_max_f16 v4, v4, v4
-; GFX900-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX900-NEXT: v_pk_min_f16 v0, v0, v4
-; GFX900-NEXT: v_pk_max_f16 v4, v5, v5
-; GFX900-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX900-NEXT: v_pk_min_f16 v1, v1, v4
-; GFX900-NEXT: v_pk_max_f16 v4, v6, v6
-; GFX900-NEXT: v_pk_max_f16 v2, v2, v2
-; GFX900-NEXT: v_pk_min_f16 v2, v2, v4
-; GFX900-NEXT: v_pk_max_f16 v4, v7, v7
-; GFX900-NEXT: v_pk_max_f16 v3, v3, v3
-; GFX900-NEXT: v_pk_min_f16 v3, v3, v4
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX950-LABEL: v_minimumnum_v8f16:
-; GFX950: ; %bb.0:
-; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_pk_max_f16 v4, v4, v4
-; GFX950-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX950-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX950-NEXT: v_pk_min_f16 v0, v0, v4
-; GFX950-NEXT: v_pk_max_f16 v4, v5, v5
-; GFX950-NEXT: v_pk_max_f16 v2, v2, v2
-; GFX950-NEXT: v_pk_min_f16 v1, v1, v4
-; GFX950-NEXT: v_pk_max_f16 v4, v6, v6
-; GFX950-NEXT: v_pk_max_f16 v3, v3, v3
-; GFX950-NEXT: v_pk_min_f16 v2, v2, v4
-; GFX950-NEXT: v_pk_max_f16 v4, v7, v7
-; GFX950-NEXT: s_nop 0
-; GFX950-NEXT: v_pk_min_f16 v3, v3, v4
-; GFX950-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_minimumnum_v8f16:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_pk_max_f16 v4, v4, v4
-; GFX10-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX10-NEXT: v_pk_max_f16 v5, v5, v5
-; GFX10-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX10-NEXT: v_pk_max_f16 v6, v6, v6
-; GFX10-NEXT: v_pk_max_f16 v2, v2, v2
-; GFX10-NEXT: v_pk_max_f16 v7, v7, v7
-; GFX10-NEXT: v_pk_max_f16 v3, v3, v3
-; GFX10-NEXT: v_pk_min_f16 v0, v0, v4
-; GFX10-NEXT: v_pk_min_f16 v1, v1, v5
-; GFX10-NEXT: v_pk_min_f16 v2, v2, v6
-; GFX10-NEXT: v_pk_min_f16 v3, v3, v7
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: v_minimumnum_v8f16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_pk_max_f16 v4, v4, v4
-; GFX11-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX11-NEXT: v_pk_max_f16 v5, v5, v5
-; GFX11-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX11-NEXT: v_pk_max_f16 v6, v6, v6
-; GFX11-NEXT: v_pk_max_f16 v2, v2, v2
-; GFX11-NEXT: v_pk_max_f16 v7, v7, v7
-; GFX11-NEXT: v_pk_max_f16 v3, v3, v3
-; GFX11-NEXT: v_pk_min_f16 v0, v0, v4
-; GFX11-NEXT: v_pk_min_f16 v1, v1, v5
-; GFX11-NEXT: v_pk_min_f16 v2, v2, v6
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX11-NEXT: v_pk_min_f16 v3, v3, v7
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_minimumnum_v8f16:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_pk_max_num_f16 v4, v4, v4
-; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v0
-; GFX12-NEXT: v_pk_max_num_f16 v5, v5, v5
-; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v1
-; GFX12-NEXT: v_pk_max_num_f16 v6, v6, v6
-; GFX12-NEXT: v_pk_max_num_f16 v2, v2, v2
-; GFX12-NEXT: v_pk_max_num_f16 v7, v7, v7
-; GFX12-NEXT: v_pk_max_num_f16 v3, v3, v3
-; GFX12-NEXT: v_pk_min_num_f16 v0, v0, v4
-; GFX12-NEXT: v_pk_min_num_f16 v1, v1, v5
-; GFX12-NEXT: v_pk_min_num_f16 v2, v2, v6
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX12-NEXT: v_pk_min_num_f16 v3, v3, v7
-; GFX12-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_minimumnum_v8f16:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v7, v7
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v15, v15
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v6, v6
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v14, v14
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v5, v5
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v13, v13
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v4, v4
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v12, v12
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v11, v11
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v10, v10
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v8, v8
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v9, v9
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v7, v7
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v15, v15
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v6, v6
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v14, v14
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v13, v13
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v12, v12
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v8, v8
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v9, v9
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v10, v10
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v11, v11
+; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v8
+; GFX7-SDAG-NEXT: v_min_f32_e32 v1, v1, v9
+; GFX7-SDAG-NEXT: v_min_f32_e32 v2, v2, v10
+; GFX7-SDAG-NEXT: v_min_f32_e32 v3, v3, v11
+; GFX7-SDAG-NEXT: v_min_f32_e32 v4, v4, v12
+; GFX7-SDAG-NEXT: v_min_f32_e32 v5, v5, v13
+; GFX7-SDAG-NEXT: v_min_f32_e32 v6, v6, v14
+; GFX7-SDAG-NEXT: v_min_f32_e32 v7, v7, v15
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_minimumnum_v8f16:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v8, v8
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v9, v9
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v8
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v8, v10
+; GFX7-GISEL-NEXT: v_min_f32_e32 v1, v1, v9
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v9, v11
+; GFX7-GISEL-NEXT: v_min_f32_e32 v2, v2, v8
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v8, v12
+; GFX7-GISEL-NEXT: v_min_f32_e32 v3, v3, v9
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v9, v13
+; GFX7-GISEL-NEXT: v_min_f32_e32 v4, v4, v8
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v6, v6
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v8, v14
+; GFX7-GISEL-NEXT: v_min_f32_e32 v5, v5, v9
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v7, v7
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v9, v15
+; GFX7-GISEL-NEXT: v_min_f32_e32 v6, v6, v8
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-GISEL-NEXT: v_min_f32_e32 v7, v7, v9
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v4, v4
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v5, v5
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v6, v6
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v7, v7
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_minimumnum_v8f16:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v8, v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v9, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_min_f16_sdwa v8, v9, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v9, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v10, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_min_f16_sdwa v9, v10, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v10, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v11, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_min_f16_sdwa v10, v11, v10 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v11, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v12, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_e32 v7, v7, v7
+; GFX8-SDAG-NEXT: v_max_f16_e32 v3, v3, v3
+; GFX8-SDAG-NEXT: v_max_f16_e32 v6, v6, v6
+; GFX8-SDAG-NEXT: v_max_f16_e32 v2, v2, v2
+; GFX8-SDAG-NEXT: v_max_f16_e32 v5, v5, v5
+; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX8-SDAG-NEXT: v_max_f16_e32 v4, v4, v4
+; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX8-SDAG-NEXT: v_min_f16_sdwa v11, v12, v11 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_min_f16_e32 v3, v3, v7
+; GFX8-SDAG-NEXT: v_min_f16_e32 v2, v2, v6
+; GFX8-SDAG-NEXT: v_min_f16_e32 v1, v1, v5
+; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v4
+; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v11
+; GFX8-SDAG-NEXT: v_or_b32_e32 v1, v1, v10
+; GFX8-SDAG-NEXT: v_or_b32_e32 v2, v2, v9
+; GFX8-SDAG-NEXT: v_or_b32_e32 v3, v3, v8
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_minimumnum_v8f16:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_max_f16_e32 v8, v0, v0
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v9, v1, v1
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v10, v2, v2
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v2, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v11, v3, v3
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v3, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v12, v4, v4
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v4, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v13, v5, v5
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v5, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v14, v6, v6
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v6, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v15, v7, v7
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v7, v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_min_f16_e32 v8, v8, v12
+; GFX8-GISEL-NEXT: v_min_f16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_min_f16_e32 v4, v9, v13
+; GFX8-GISEL-NEXT: v_min_f16_sdwa v1, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_min_f16_e32 v5, v10, v14
+; GFX8-GISEL-NEXT: v_min_f16_sdwa v2, v2, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_min_f16_e32 v6, v11, v15
+; GFX8-GISEL-NEXT: v_min_f16_sdwa v3, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v8, v0
+; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v4, v1
+; GFX8-GISEL-NEXT: v_or_b32_e32 v2, v5, v2
+; GFX8-GISEL-NEXT: v_or_b32_e32 v3, v6, v3
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_minimumnum_v8f16:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_pk_max_f16 v4, v4, v4
+; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX900-SDAG-NEXT: v_pk_min_f16 v0, v0, v4
+; GFX900-SDAG-NEXT: v_pk_max_f16 v4, v5, v5
+; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX900-SDAG-NEXT: v_pk_min_f16 v1, v1, v4
+; GFX900-SDAG-NEXT: v_pk_max_f16 v4, v6, v6
+; GFX900-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX900-SDAG-NEXT: v_pk_min_f16 v2, v2, v4
+; GFX900-SDAG-NEXT: v_pk_max_f16 v4, v7, v7
+; GFX900-SDAG-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX900-SDAG-NEXT: v_pk_min_f16 v3, v3, v4
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: v_minimumnum_v8f16:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX9-GISEL-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX9-GISEL-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX9-GISEL-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX9-GISEL-NEXT: v_pk_max_f16 v4, v4, v4
+; GFX9-GISEL-NEXT: v_pk_max_f16 v5, v5, v5
+; GFX9-GISEL-NEXT: v_pk_max_f16 v6, v6, v6
+; GFX9-GISEL-NEXT: v_pk_max_f16 v7, v7, v7
+; GFX9-GISEL-NEXT: v_pk_min_f16 v0, v0, v4
+; GFX9-GISEL-NEXT: v_pk_min_f16 v1, v1, v5
+; GFX9-GISEL-NEXT: v_pk_min_f16 v2, v2, v6
+; GFX9-GISEL-NEXT: v_pk_min_f16 v3, v3, v7
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_minimumnum_v8f16:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_pk_max_f16 v4, v4, v4
+; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX950-SDAG-NEXT: v_pk_min_f16 v0, v0, v4
+; GFX950-SDAG-NEXT: v_pk_max_f16 v4, v5, v5
+; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX950-SDAG-NEXT: v_pk_min_f16 v1, v1, v4
+; GFX950-SDAG-NEXT: v_pk_max_f16 v4, v6, v6
+; GFX950-SDAG-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX950-SDAG-NEXT: v_pk_min_f16 v2, v2, v4
+; GFX950-SDAG-NEXT: v_pk_max_f16 v4, v7, v7
+; GFX950-SDAG-NEXT: s_nop 0
+; GFX950-SDAG-NEXT: v_pk_min_f16 v3, v3, v4
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_minimumnum_v8f16:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_pk_max_f16 v4, v4, v4
+; GFX10-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX10-SDAG-NEXT: v_pk_max_f16 v5, v5, v5
+; GFX10-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX10-SDAG-NEXT: v_pk_max_f16 v6, v6, v6
+; GFX10-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX10-SDAG-NEXT: v_pk_max_f16 v7, v7, v7
+; GFX10-SDAG-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX10-SDAG-NEXT: v_pk_min_f16 v0, v0, v4
+; GFX10-SDAG-NEXT: v_pk_min_f16 v1, v1, v5
+; GFX10-SDAG-NEXT: v_pk_min_f16 v2, v2, v6
+; GFX10-SDAG-NEXT: v_pk_min_f16 v3, v3, v7
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_minimumnum_v8f16:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX10-GISEL-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX10-GISEL-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX10-GISEL-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX10-GISEL-NEXT: v_pk_max_f16 v4, v4, v4
+; GFX10-GISEL-NEXT: v_pk_max_f16 v5, v5, v5
+; GFX10-GISEL-NEXT: v_pk_max_f16 v6, v6, v6
+; GFX10-GISEL-NEXT: v_pk_max_f16 v7, v7, v7
+; GFX10-GISEL-NEXT: v_pk_min_f16 v0, v0, v4
+; GFX10-GISEL-NEXT: v_pk_min_f16 v1, v1, v5
+; GFX10-GISEL-NEXT: v_pk_min_f16 v2, v2, v6
+; GFX10-GISEL-NEXT: v_pk_min_f16 v3, v3, v7
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_minimumnum_v8f16:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_pk_max_f16 v4, v4, v4
+; GFX11-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX11-SDAG-NEXT: v_pk_max_f16 v5, v5, v5
+; GFX11-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX11-SDAG-NEXT: v_pk_max_f16 v6, v6, v6
+; GFX11-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX11-SDAG-NEXT: v_pk_max_f16 v7, v7, v7
+; GFX11-SDAG-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX11-SDAG-NEXT: v_pk_min_f16 v0, v0, v4
+; GFX11-SDAG-NEXT: v_pk_min_f16 v1, v1, v5
+; GFX11-SDAG-NEXT: v_pk_min_f16 v2, v2, v6
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX11-SDAG-NEXT: v_pk_min_f16 v3, v3, v7
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_minimumnum_v8f16:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX11-GISEL-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX11-GISEL-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX11-GISEL-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX11-GISEL-NEXT: v_pk_max_f16 v4, v4, v4
+; GFX11-GISEL-NEXT: v_pk_max_f16 v5, v5, v5
+; GFX11-GISEL-NEXT: v_pk_max_f16 v6, v6, v6
+; GFX11-GISEL-NEXT: v_pk_max_f16 v7, v7, v7
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-GISEL-NEXT: v_pk_min_f16 v0, v0, v4
+; GFX11-GISEL-NEXT: v_pk_min_f16 v1, v1, v5
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-GISEL-NEXT: v_pk_min_f16 v2, v2, v6
+; GFX11-GISEL-NEXT: v_pk_min_f16 v3, v3, v7
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-SDAG-LABEL: v_minimumnum_v8f16:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v4, v4, v4
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v5, v5, v5
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v6, v6, v6
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v2
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v7, v7, v7
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v3
+; GFX12-SDAG-NEXT: v_pk_min_num_f16 v0, v0, v4
+; GFX12-SDAG-NEXT: v_pk_min_num_f16 v1, v1, v5
+; GFX12-SDAG-NEXT: v_pk_min_num_f16 v2, v2, v6
+; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX12-SDAG-NEXT: v_pk_min_num_f16 v3, v3, v7
+; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GISEL-LABEL: v_minimumnum_v8f16:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v2
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v3
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v4, v4, v4
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v5, v5, v5
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v6, v6, v6
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v7, v7, v7
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX12-GISEL-NEXT: v_pk_min_num_f16 v0, v0, v4
+; GFX12-GISEL-NEXT: v_pk_min_num_f16 v1, v1, v5
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX12-GISEL-NEXT: v_pk_min_num_f16 v2, v2, v6
+; GFX12-GISEL-NEXT: v_pk_min_num_f16 v3, v3, v7
+; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call <8 x half> @llvm.minimumnum.v8f16(<8 x half> %x, <8 x half> %y)
ret <8 x half> %result
}
-define <16 x half> @v_minimumnum_v16f16(<16 x half> %x, <16 x half> %y) {
-; GFX7-LABEL: v_minimumnum_v16f16:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f16_f32_e32 v16, v16
-; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v16, v16
-; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
-; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX7-NEXT: v_min_f32_e32 v0, v0, v16
-; GFX7-NEXT: v_cvt_f16_f32_e32 v16, v17
-; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
-; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
-; GFX7-NEXT: v_cvt_f16_f32_e32 v17, v20
-; GFX7-NEXT: v_cvt_f32_f16_e32 v16, v16
-; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
-; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
-; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
-; GFX7-NEXT: v_min_f32_e32 v1, v1, v16
-; GFX7-NEXT: v_cvt_f16_f32_e32 v16, v18
-; GFX7-NEXT: v_cvt_f16_f32_e32 v18, v21
-; GFX7-NEXT: v_cvt_f32_f16_e32 v17, v17
-; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
-; GFX7-NEXT: v_cvt_f32_f16_e32 v16, v16
-; GFX7-NEXT: v_cvt_f32_f16_e32 v18, v18
-; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
-; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
-; GFX7-NEXT: v_min_f32_e32 v2, v2, v16
-; GFX7-NEXT: v_cvt_f16_f32_e32 v16, v19
-; GFX7-NEXT: v_cvt_f16_f32_e32 v19, v22
-; GFX7-NEXT: v_cvt_f16_f32_e32 v20, v23
-; GFX7-NEXT: v_min_f32_e32 v4, v4, v17
-; GFX7-NEXT: v_cvt_f32_f16_e32 v16, v16
-; GFX7-NEXT: v_cvt_f32_f16_e32 v19, v19
-; GFX7-NEXT: v_min_f32_e32 v5, v5, v18
-; GFX7-NEXT: v_cvt_f16_f32_e32 v8, v8
-; GFX7-NEXT: v_min_f32_e32 v3, v3, v16
-; GFX7-NEXT: buffer_load_dword v16, off, s[0:3], s32
-; GFX7-NEXT: v_min_f32_e32 v6, v6, v19
-; GFX7-NEXT: v_cvt_f16_f32_e32 v17, v24
-; GFX7-NEXT: v_cvt_f16_f32_e32 v9, v9
-; GFX7-NEXT: v_cvt_f16_f32_e32 v18, v25
-; GFX7-NEXT: v_cvt_f16_f32_e32 v10, v10
-; GFX7-NEXT: v_cvt_f16_f32_e32 v19, v26
-; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
-; GFX7-NEXT: v_cvt_f32_f16_e32 v20, v20
-; GFX7-NEXT: v_cvt_f32_f16_e32 v8, v8
-; GFX7-NEXT: v_cvt_f32_f16_e32 v17, v17
-; GFX7-NEXT: v_cvt_f32_f16_e32 v9, v9
-; GFX7-NEXT: v_cvt_f32_f16_e32 v18, v18
-; GFX7-NEXT: v_cvt_f32_f16_e32 v10, v10
-; GFX7-NEXT: v_cvt_f32_f16_e32 v19, v19
-; GFX7-NEXT: v_min_f32_e32 v7, v7, v20
-; GFX7-NEXT: v_cvt_f16_f32_e32 v11, v11
-; GFX7-NEXT: v_cvt_f16_f32_e32 v20, v27
-; GFX7-NEXT: v_min_f32_e32 v8, v8, v17
-; GFX7-NEXT: v_min_f32_e32 v9, v9, v18
-; GFX7-NEXT: v_min_f32_e32 v10, v10, v19
-; GFX7-NEXT: v_cvt_f16_f32_e32 v12, v12
-; GFX7-NEXT: v_cvt_f16_f32_e32 v17, v28
-; GFX7-NEXT: v_cvt_f16_f32_e32 v13, v13
-; GFX7-NEXT: v_cvt_f16_f32_e32 v18, v29
-; GFX7-NEXT: v_cvt_f16_f32_e32 v14, v14
-; GFX7-NEXT: v_cvt_f16_f32_e32 v19, v30
-; GFX7-NEXT: v_cvt_f16_f32_e32 v15, v15
-; GFX7-NEXT: v_cvt_f32_f16_e32 v11, v11
-; GFX7-NEXT: v_cvt_f32_f16_e32 v20, v20
-; GFX7-NEXT: v_cvt_f32_f16_e32 v12, v12
-; GFX7-NEXT: v_cvt_f32_f16_e32 v17, v17
-; GFX7-NEXT: v_cvt_f32_f16_e32 v13, v13
-; GFX7-NEXT: v_cvt_f32_f16_e32 v18, v18
-; GFX7-NEXT: v_cvt_f32_f16_e32 v14, v14
-; GFX7-NEXT: v_cvt_f32_f16_e32 v19, v19
-; GFX7-NEXT: v_cvt_f32_f16_e32 v15, v15
-; GFX7-NEXT: v_min_f32_e32 v11, v11, v20
-; GFX7-NEXT: v_min_f32_e32 v12, v12, v17
-; GFX7-NEXT: v_min_f32_e32 v13, v13, v18
-; GFX7-NEXT: v_min_f32_e32 v14, v14, v19
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v16, v16
-; GFX7-NEXT: v_cvt_f32_f16_e32 v16, v16
-; GFX7-NEXT: v_min_f32_e32 v15, v15, v16
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_minimumnum_v16f16:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_max_f16_sdwa v16, v15, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v17, v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_min_f16_sdwa v16, v17, v16 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_max_f16_sdwa v17, v14, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v18, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_min_f16_sdwa v17, v18, v17 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_max_f16_sdwa v18, v13, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v19, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_min_f16_sdwa v18, v19, v18 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_max_f16_sdwa v19, v12, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v20, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_min_f16_sdwa v19, v20, v19 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_max_f16_sdwa v20, v11, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v21, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_min_f16_sdwa v20, v21, v20 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_max_f16_sdwa v21, v10, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v22, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_min_f16_sdwa v21, v22, v21 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_max_f16_sdwa v22, v9, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v23, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_min_f16_sdwa v22, v23, v22 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_max_f16_sdwa v23, v8, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v24, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_e32 v15, v15, v15
-; GFX8-NEXT: v_max_f16_e32 v7, v7, v7
-; GFX8-NEXT: v_max_f16_e32 v14, v14, v14
-; GFX8-NEXT: v_max_f16_e32 v6, v6, v6
-; GFX8-NEXT: v_max_f16_e32 v13, v13, v13
-; GFX8-NEXT: v_max_f16_e32 v5, v5, v5
-; GFX8-NEXT: v_max_f16_e32 v12, v12, v12
-; GFX8-NEXT: v_max_f16_e32 v4, v4, v4
-; GFX8-NEXT: v_max_f16_e32 v11, v11, v11
-; GFX8-NEXT: v_max_f16_e32 v3, v3, v3
-; GFX8-NEXT: v_max_f16_e32 v10, v10, v10
-; GFX8-NEXT: v_max_f16_e32 v2, v2, v2
-; GFX8-NEXT: v_max_f16_e32 v9, v9, v9
-; GFX8-NEXT: v_max_f16_e32 v1, v1, v1
-; GFX8-NEXT: v_max_f16_e32 v8, v8, v8
-; GFX8-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX8-NEXT: v_min_f16_sdwa v23, v24, v23 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_min_f16_e32 v7, v7, v15
-; GFX8-NEXT: v_min_f16_e32 v6, v6, v14
-; GFX8-NEXT: v_min_f16_e32 v5, v5, v13
-; GFX8-NEXT: v_min_f16_e32 v4, v4, v12
-; GFX8-NEXT: v_min_f16_e32 v3, v3, v11
-; GFX8-NEXT: v_min_f16_e32 v2, v2, v10
-; GFX8-NEXT: v_min_f16_e32 v1, v1, v9
-; GFX8-NEXT: v_min_f16_e32 v0, v0, v8
-; GFX8-NEXT: v_or_b32_e32 v0, v0, v23
-; GFX8-NEXT: v_or_b32_e32 v1, v1, v22
-; GFX8-NEXT: v_or_b32_e32 v2, v2, v21
-; GFX8-NEXT: v_or_b32_e32 v3, v3, v20
-; GFX8-NEXT: v_or_b32_e32 v4, v4, v19
-; GFX8-NEXT: v_or_b32_e32 v5, v5, v18
-; GFX8-NEXT: v_or_b32_e32 v6, v6, v17
-; GFX8-NEXT: v_or_b32_e32 v7, v7, v16
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX900-LABEL: v_minimumnum_v16f16:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_pk_max_f16 v8, v8, v8
-; GFX900-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX900-NEXT: v_pk_min_f16 v0, v0, v8
-; GFX900-NEXT: v_pk_max_f16 v8, v9, v9
-; GFX900-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX900-NEXT: v_pk_min_f16 v1, v1, v8
-; GFX900-NEXT: v_pk_max_f16 v8, v10, v10
-; GFX900-NEXT: v_pk_max_f16 v2, v2, v2
-; GFX900-NEXT: v_pk_min_f16 v2, v2, v8
-; GFX900-NEXT: v_pk_max_f16 v8, v11, v11
-; GFX900-NEXT: v_pk_max_f16 v3, v3, v3
-; GFX900-NEXT: v_pk_min_f16 v3, v3, v8
-; GFX900-NEXT: v_pk_max_f16 v8, v12, v12
-; GFX900-NEXT: v_pk_max_f16 v4, v4, v4
-; GFX900-NEXT: v_pk_min_f16 v4, v4, v8
-; GFX900-NEXT: v_pk_max_f16 v8, v13, v13
-; GFX900-NEXT: v_pk_max_f16 v5, v5, v5
-; GFX900-NEXT: v_pk_min_f16 v5, v5, v8
-; GFX900-NEXT: v_pk_max_f16 v8, v14, v14
-; GFX900-NEXT: v_pk_max_f16 v6, v6, v6
-; GFX900-NEXT: v_pk_min_f16 v6, v6, v8
-; GFX900-NEXT: v_pk_max_f16 v8, v15, v15
-; GFX900-NEXT: v_pk_max_f16 v7, v7, v7
-; GFX900-NEXT: v_pk_min_f16 v7, v7, v8
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX950-LABEL: v_minimumnum_v16f16:
-; GFX950: ; %bb.0:
-; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_pk_max_f16 v8, v8, v8
-; GFX950-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX950-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX950-NEXT: v_pk_min_f16 v0, v0, v8
-; GFX950-NEXT: v_pk_max_f16 v8, v9, v9
-; GFX950-NEXT: v_pk_max_f16 v2, v2, v2
-; GFX950-NEXT: v_pk_min_f16 v1, v1, v8
-; GFX950-NEXT: v_pk_max_f16 v8, v10, v10
-; GFX950-NEXT: v_pk_max_f16 v3, v3, v3
-; GFX950-NEXT: v_pk_min_f16 v2, v2, v8
-; GFX950-NEXT: v_pk_max_f16 v8, v11, v11
-; GFX950-NEXT: v_pk_max_f16 v4, v4, v4
-; GFX950-NEXT: v_pk_min_f16 v3, v3, v8
-; GFX950-NEXT: v_pk_max_f16 v8, v12, v12
-; GFX950-NEXT: v_pk_max_f16 v5, v5, v5
-; GFX950-NEXT: v_pk_min_f16 v4, v4, v8
-; GFX950-NEXT: v_pk_max_f16 v8, v13, v13
-; GFX950-NEXT: v_pk_max_f16 v6, v6, v6
-; GFX950-NEXT: v_pk_min_f16 v5, v5, v8
-; GFX950-NEXT: v_pk_max_f16 v8, v14, v14
-; GFX950-NEXT: v_pk_max_f16 v7, v7, v7
-; GFX950-NEXT: v_pk_min_f16 v6, v6, v8
-; GFX950-NEXT: v_pk_max_f16 v8, v15, v15
-; GFX950-NEXT: s_nop 0
-; GFX950-NEXT: v_pk_min_f16 v7, v7, v8
-; GFX950-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_minimumnum_v16f16:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_pk_max_f16 v8, v8, v8
-; GFX10-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX10-NEXT: v_pk_max_f16 v9, v9, v9
-; GFX10-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX10-NEXT: v_pk_max_f16 v10, v10, v10
-; GFX10-NEXT: v_pk_max_f16 v2, v2, v2
-; GFX10-NEXT: v_pk_min_f16 v0, v0, v8
-; GFX10-NEXT: v_pk_max_f16 v8, v11, v11
-; GFX10-NEXT: v_pk_min_f16 v1, v1, v9
-; GFX10-NEXT: v_pk_max_f16 v3, v3, v3
-; GFX10-NEXT: v_pk_min_f16 v2, v2, v10
-; GFX10-NEXT: v_pk_max_f16 v9, v12, v12
-; GFX10-NEXT: v_pk_max_f16 v4, v4, v4
-; GFX10-NEXT: v_pk_max_f16 v10, v13, v13
-; GFX10-NEXT: v_pk_max_f16 v5, v5, v5
-; GFX10-NEXT: v_pk_max_f16 v11, v14, v14
-; GFX10-NEXT: v_pk_max_f16 v6, v6, v6
-; GFX10-NEXT: v_pk_max_f16 v12, v15, v15
-; GFX10-NEXT: v_pk_max_f16 v7, v7, v7
-; GFX10-NEXT: v_pk_min_f16 v3, v3, v8
-; GFX10-NEXT: v_pk_min_f16 v4, v4, v9
-; GFX10-NEXT: v_pk_min_f16 v5, v5, v10
-; GFX10-NEXT: v_pk_min_f16 v6, v6, v11
-; GFX10-NEXT: v_pk_min_f16 v7, v7, v12
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: v_minimumnum_v16f16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_pk_max_f16 v8, v8, v8
-; GFX11-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX11-NEXT: v_pk_max_f16 v9, v9, v9
-; GFX11-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX11-NEXT: v_pk_max_f16 v10, v10, v10
-; GFX11-NEXT: v_pk_max_f16 v2, v2, v2
-; GFX11-NEXT: v_pk_min_f16 v0, v0, v8
-; GFX11-NEXT: v_pk_max_f16 v8, v11, v11
-; GFX11-NEXT: v_pk_min_f16 v1, v1, v9
-; GFX11-NEXT: v_pk_max_f16 v3, v3, v3
-; GFX11-NEXT: v_pk_min_f16 v2, v2, v10
-; GFX11-NEXT: v_pk_max_f16 v9, v12, v12
-; GFX11-NEXT: v_pk_max_f16 v4, v4, v4
-; GFX11-NEXT: v_pk_max_f16 v10, v13, v13
-; GFX11-NEXT: v_pk_max_f16 v5, v5, v5
-; GFX11-NEXT: v_pk_max_f16 v11, v14, v14
-; GFX11-NEXT: v_pk_max_f16 v6, v6, v6
-; GFX11-NEXT: v_pk_max_f16 v12, v15, v15
-; GFX11-NEXT: v_pk_max_f16 v7, v7, v7
-; GFX11-NEXT: v_pk_min_f16 v3, v3, v8
-; GFX11-NEXT: v_pk_min_f16 v4, v4, v9
-; GFX11-NEXT: v_pk_min_f16 v5, v5, v10
-; GFX11-NEXT: v_pk_min_f16 v6, v6, v11
-; GFX11-NEXT: v_pk_min_f16 v7, v7, v12
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_minimumnum_v16f16:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_pk_max_num_f16 v8, v8, v8
-; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v0
-; GFX12-NEXT: v_pk_max_num_f16 v9, v9, v9
-; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v1
-; GFX12-NEXT: v_pk_max_num_f16 v10, v10, v10
-; GFX12-NEXT: v_pk_max_num_f16 v2, v2, v2
-; GFX12-NEXT: v_pk_min_num_f16 v0, v0, v8
-; GFX12-NEXT: v_pk_max_num_f16 v8, v11, v11
-; GFX12-NEXT: v_pk_min_num_f16 v1, v1, v9
-; GFX12-NEXT: v_pk_max_num_f16 v3, v3, v3
-; GFX12-NEXT: v_pk_min_num_f16 v2, v2, v10
-; GFX12-NEXT: v_pk_max_num_f16 v9, v12, v12
-; GFX12-NEXT: v_pk_max_num_f16 v4, v4, v4
-; GFX12-NEXT: v_pk_max_num_f16 v10, v13, v13
-; GFX12-NEXT: v_pk_max_num_f16 v5, v5, v5
-; GFX12-NEXT: v_pk_max_num_f16 v11, v14, v14
-; GFX12-NEXT: v_pk_max_num_f16 v6, v6, v6
-; GFX12-NEXT: v_pk_max_num_f16 v12, v15, v15
-; GFX12-NEXT: v_pk_max_num_f16 v7, v7, v7
-; GFX12-NEXT: v_pk_min_num_f16 v3, v3, v8
-; GFX12-NEXT: v_pk_min_num_f16 v4, v4, v9
-; GFX12-NEXT: v_pk_min_num_f16 v5, v5, v10
-; GFX12-NEXT: v_pk_min_num_f16 v6, v6, v11
-; GFX12-NEXT: v_pk_min_num_f16 v7, v7, v12
-; GFX12-NEXT: s_setpc_b64 s[30:31]
- %result = call <16 x half> @llvm.minimumnum.v16f16(<16 x half> %x, <16 x half> %y)
- ret <16 x half> %result
-}
-
-define <32 x half> @v_minimumnum_v32f16(<32 x half> %x, <32 x half> %y) {
-; GFX7-LABEL: v_minimumnum_v32f16:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
-; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
-; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
-; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
-; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
-; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
-; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
-; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
-; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
-; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
-; GFX7-NEXT: v_cvt_f16_f32_e32 v8, v8
-; GFX7-NEXT: v_cvt_f16_f32_e32 v9, v9
-; GFX7-NEXT: v_cvt_f16_f32_e32 v10, v10
-; GFX7-NEXT: v_cvt_f16_f32_e32 v11, v11
-; GFX7-NEXT: v_cvt_f32_f16_e32 v8, v8
-; GFX7-NEXT: v_cvt_f32_f16_e32 v9, v9
-; GFX7-NEXT: v_cvt_f32_f16_e32 v10, v10
-; GFX7-NEXT: v_cvt_f32_f16_e32 v11, v11
-; GFX7-NEXT: v_cvt_f16_f32_e32 v12, v12
-; GFX7-NEXT: v_cvt_f16_f32_e32 v13, v13
-; GFX7-NEXT: v_cvt_f16_f32_e32 v14, v14
-; GFX7-NEXT: v_cvt_f16_f32_e32 v15, v15
-; GFX7-NEXT: v_cvt_f32_f16_e32 v12, v12
-; GFX7-NEXT: v_cvt_f32_f16_e32 v13, v13
-; GFX7-NEXT: v_cvt_f32_f16_e32 v14, v14
-; GFX7-NEXT: v_cvt_f32_f16_e32 v15, v15
-; GFX7-NEXT: v_cvt_f16_f32_e32 v16, v16
-; GFX7-NEXT: v_cvt_f16_f32_e32 v17, v17
-; GFX7-NEXT: v_cvt_f16_f32_e32 v18, v18
-; GFX7-NEXT: v_cvt_f16_f32_e32 v19, v19
-; GFX7-NEXT: v_cvt_f32_f16_e32 v16, v16
-; GFX7-NEXT: v_cvt_f32_f16_e32 v17, v17
-; GFX7-NEXT: v_cvt_f32_f16_e32 v18, v18
-; GFX7-NEXT: v_cvt_f32_f16_e32 v19, v19
-; GFX7-NEXT: v_cvt_f16_f32_e32 v20, v20
-; GFX7-NEXT: v_cvt_f16_f32_e32 v21, v21
-; GFX7-NEXT: v_cvt_f16_f32_e32 v22, v22
-; GFX7-NEXT: v_cvt_f16_f32_e32 v23, v23
-; GFX7-NEXT: v_cvt_f32_f16_e32 v20, v20
-; GFX7-NEXT: v_cvt_f32_f16_e32 v21, v21
-; GFX7-NEXT: v_cvt_f32_f16_e32 v22, v22
-; GFX7-NEXT: v_cvt_f32_f16_e32 v23, v23
-; GFX7-NEXT: v_cvt_f16_f32_e32 v24, v24
-; GFX7-NEXT: v_cvt_f16_f32_e32 v25, v25
-; GFX7-NEXT: v_cvt_f16_f32_e32 v26, v26
-; GFX7-NEXT: v_cvt_f16_f32_e32 v27, v27
-; GFX7-NEXT: v_cvt_f32_f16_e32 v24, v24
-; GFX7-NEXT: v_cvt_f32_f16_e32 v25, v25
-; GFX7-NEXT: v_cvt_f32_f16_e32 v26, v26
-; GFX7-NEXT: v_cvt_f32_f16_e32 v27, v27
-; GFX7-NEXT: v_cvt_f16_f32_e32 v28, v28
-; GFX7-NEXT: v_cvt_f16_f32_e32 v29, v29
-; GFX7-NEXT: v_cvt_f16_f32_e32 v30, v30
-; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:128
-; GFX7-NEXT: v_cvt_f32_f16_e32 v28, v28
-; GFX7-NEXT: v_cvt_f32_f16_e32 v29, v29
-; GFX7-NEXT: v_cvt_f32_f16_e32 v30, v30
-; GFX7-NEXT: s_waitcnt vmcnt(1)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_min_f32_e32 v0, v0, v31
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:8
-; GFX7-NEXT: s_waitcnt vmcnt(1)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v32, v32
-; GFX7-NEXT: v_cvt_f32_f16_e32 v32, v32
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_min_f32_e32 v1, v1, v31
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:12
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_min_f32_e32 v2, v2, v31
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:16
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_min_f32_e32 v3, v3, v31
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:20
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_min_f32_e32 v4, v4, v31
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:24
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_min_f32_e32 v5, v5, v31
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_min_f32_e32 v6, v6, v31
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:32
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_min_f32_e32 v7, v7, v31
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:36
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_min_f32_e32 v8, v8, v31
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:40
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_min_f32_e32 v9, v9, v31
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:44
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_min_f32_e32 v10, v10, v31
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:48
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_min_f32_e32 v11, v11, v31
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:52
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_min_f32_e32 v12, v12, v31
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:56
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_min_f32_e32 v13, v13, v31
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:60
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_min_f32_e32 v14, v14, v31
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:64
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_min_f32_e32 v15, v15, v31
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:68
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_min_f32_e32 v16, v16, v31
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:72
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_min_f32_e32 v17, v17, v31
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:76
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_min_f32_e32 v18, v18, v31
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:80
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_min_f32_e32 v19, v19, v31
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:84
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_min_f32_e32 v20, v20, v31
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:88
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_min_f32_e32 v21, v21, v31
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:92
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_min_f32_e32 v22, v22, v31
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:96
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_min_f32_e32 v23, v23, v31
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:100
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_min_f32_e32 v24, v24, v31
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:104
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_min_f32_e32 v25, v25, v31
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:108
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_min_f32_e32 v26, v26, v31
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:112
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_min_f32_e32 v27, v27, v31
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_min_f32_e32 v28, v28, v31
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:120
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_min_f32_e32 v29, v29, v31
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:124
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_min_f32_e32 v30, v30, v31
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31
-; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31
-; GFX7-NEXT: v_min_f32_e32 v31, v31, v32
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_minimumnum_v32f16:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
-; GFX8-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
-; GFX8-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
-; GFX8-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
-; GFX8-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
-; GFX8-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
-; GFX8-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
-; GFX8-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
-; GFX8-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
-; GFX8-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
-; GFX8-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
-; GFX8-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
-; GFX8-NEXT: v_max_f16_sdwa v38, v27, v27 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v39, v11, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v48, v26, v26 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v49, v10, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v50, v25, v25 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v51, v9, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v40, v22, v22 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v41, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v58, v17, v17 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v59, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_e32 v17, v17, v17
-; GFX8-NEXT: v_max_f16_e32 v1, v1, v1
-; GFX8-NEXT: v_max_f16_sdwa v52, v24, v24 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v53, v8, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v54, v23, v23 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v55, v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v42, v21, v21 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v43, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v44, v20, v20 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v45, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v46, v19, v19 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v47, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v56, v18, v18 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v57, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_min_f16_sdwa v38, v39, v38 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_min_f16_sdwa v39, v49, v48 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_min_f16_sdwa v48, v51, v50 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_min_f16_sdwa v51, v41, v40 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_min_f16_sdwa v40, v59, v58 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_min_f16_e32 v1, v1, v17
-; GFX8-NEXT: v_min_f16_sdwa v49, v53, v52 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_min_f16_sdwa v50, v55, v54 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_min_f16_sdwa v52, v43, v42 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_min_f16_sdwa v53, v45, v44 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_min_f16_sdwa v54, v47, v46 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_min_f16_sdwa v55, v57, v56 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_or_b32_e32 v1, v1, v40
-; GFX8-NEXT: buffer_load_dword v59, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
-; GFX8-NEXT: buffer_load_dword v58, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
-; GFX8-NEXT: buffer_load_dword v57, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
-; GFX8-NEXT: buffer_load_dword v56, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
-; GFX8-NEXT: buffer_load_dword v47, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
-; GFX8-NEXT: buffer_load_dword v46, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
-; GFX8-NEXT: buffer_load_dword v45, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
-; GFX8-NEXT: buffer_load_dword v44, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
-; GFX8-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
-; GFX8-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
-; GFX8-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
-; GFX8-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
-; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32
-; GFX8-NEXT: v_max_f16_sdwa v32, v30, v30 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v33, v14, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v34, v29, v29 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v35, v13, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v36, v28, v28 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v37, v12, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_min_f16_sdwa v32, v33, v32 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_max_f16_sdwa v33, v16, v16 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_min_f16_sdwa v34, v35, v34 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_max_f16_sdwa v35, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_min_f16_sdwa v36, v37, v36 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_max_f16_sdwa v37, v15, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_e32 v15, v15, v15
-; GFX8-NEXT: v_min_f16_sdwa v33, v35, v33 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_max_f16_e32 v30, v30, v30
-; GFX8-NEXT: v_max_f16_e32 v14, v14, v14
-; GFX8-NEXT: v_max_f16_e32 v29, v29, v29
-; GFX8-NEXT: v_max_f16_e32 v13, v13, v13
-; GFX8-NEXT: v_max_f16_e32 v28, v28, v28
-; GFX8-NEXT: v_max_f16_e32 v12, v12, v12
-; GFX8-NEXT: v_max_f16_e32 v27, v27, v27
-; GFX8-NEXT: v_max_f16_e32 v11, v11, v11
-; GFX8-NEXT: v_max_f16_e32 v26, v26, v26
-; GFX8-NEXT: v_max_f16_e32 v10, v10, v10
-; GFX8-NEXT: v_max_f16_e32 v25, v25, v25
-; GFX8-NEXT: v_max_f16_e32 v9, v9, v9
-; GFX8-NEXT: v_max_f16_e32 v24, v24, v24
-; GFX8-NEXT: v_max_f16_e32 v8, v8, v8
-; GFX8-NEXT: v_max_f16_e32 v23, v23, v23
-; GFX8-NEXT: v_max_f16_e32 v7, v7, v7
-; GFX8-NEXT: v_max_f16_e32 v22, v22, v22
-; GFX8-NEXT: v_max_f16_e32 v6, v6, v6
-; GFX8-NEXT: v_max_f16_e32 v21, v21, v21
-; GFX8-NEXT: v_max_f16_e32 v5, v5, v5
-; GFX8-NEXT: v_max_f16_e32 v20, v20, v20
-; GFX8-NEXT: v_max_f16_e32 v4, v4, v4
-; GFX8-NEXT: v_max_f16_e32 v19, v19, v19
-; GFX8-NEXT: v_max_f16_e32 v3, v3, v3
-; GFX8-NEXT: v_max_f16_e32 v18, v18, v18
-; GFX8-NEXT: v_max_f16_e32 v2, v2, v2
-; GFX8-NEXT: v_max_f16_e32 v16, v16, v16
-; GFX8-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX8-NEXT: v_min_f16_e32 v14, v14, v30
-; GFX8-NEXT: v_min_f16_e32 v13, v13, v29
-; GFX8-NEXT: v_min_f16_e32 v12, v12, v28
-; GFX8-NEXT: v_min_f16_e32 v11, v11, v27
-; GFX8-NEXT: v_min_f16_e32 v10, v10, v26
-; GFX8-NEXT: v_min_f16_e32 v9, v9, v25
-; GFX8-NEXT: v_min_f16_e32 v8, v8, v24
-; GFX8-NEXT: v_min_f16_e32 v7, v7, v23
-; GFX8-NEXT: v_min_f16_e32 v6, v6, v22
-; GFX8-NEXT: v_min_f16_e32 v5, v5, v21
-; GFX8-NEXT: v_min_f16_e32 v4, v4, v20
-; GFX8-NEXT: v_min_f16_e32 v3, v3, v19
-; GFX8-NEXT: v_min_f16_e32 v2, v2, v18
-; GFX8-NEXT: v_min_f16_e32 v0, v0, v16
-; GFX8-NEXT: v_or_b32_e32 v0, v0, v33
-; GFX8-NEXT: v_or_b32_e32 v2, v2, v55
-; GFX8-NEXT: v_or_b32_e32 v3, v3, v54
-; GFX8-NEXT: v_or_b32_e32 v4, v4, v53
-; GFX8-NEXT: v_or_b32_e32 v5, v5, v52
-; GFX8-NEXT: v_or_b32_e32 v6, v6, v51
-; GFX8-NEXT: v_or_b32_e32 v7, v7, v50
-; GFX8-NEXT: v_or_b32_e32 v8, v8, v49
-; GFX8-NEXT: v_or_b32_e32 v9, v9, v48
-; GFX8-NEXT: v_or_b32_e32 v10, v10, v39
-; GFX8-NEXT: v_or_b32_e32 v11, v11, v38
-; GFX8-NEXT: v_or_b32_e32 v12, v12, v36
-; GFX8-NEXT: v_or_b32_e32 v13, v13, v34
-; GFX8-NEXT: v_or_b32_e32 v14, v14, v32
-; GFX8-NEXT: s_waitcnt vmcnt(0)
-; GFX8-NEXT: v_max_f16_sdwa v35, v31, v31 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_e32 v31, v31, v31
-; GFX8-NEXT: v_min_f16_sdwa v35, v37, v35 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_min_f16_e32 v15, v15, v31
-; GFX8-NEXT: v_or_b32_e32 v15, v15, v35
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX900-LABEL: v_minimumnum_v32f16:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_pk_max_f16 v16, v16, v16
-; GFX900-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX900-NEXT: v_pk_min_f16 v0, v0, v16
-; GFX900-NEXT: v_pk_max_f16 v16, v17, v17
-; GFX900-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX900-NEXT: v_pk_min_f16 v1, v1, v16
-; GFX900-NEXT: v_pk_max_f16 v16, v18, v18
-; GFX900-NEXT: v_pk_max_f16 v2, v2, v2
-; GFX900-NEXT: v_pk_min_f16 v2, v2, v16
-; GFX900-NEXT: v_pk_max_f16 v16, v19, v19
-; GFX900-NEXT: v_pk_max_f16 v3, v3, v3
-; GFX900-NEXT: v_pk_min_f16 v3, v3, v16
-; GFX900-NEXT: buffer_load_dword v16, off, s[0:3], s32
-; GFX900-NEXT: v_pk_max_f16 v17, v20, v20
-; GFX900-NEXT: v_pk_max_f16 v4, v4, v4
-; GFX900-NEXT: v_pk_max_f16 v18, v21, v21
-; GFX900-NEXT: v_pk_max_f16 v5, v5, v5
-; GFX900-NEXT: v_pk_max_f16 v19, v22, v22
-; GFX900-NEXT: v_pk_max_f16 v6, v6, v6
-; GFX900-NEXT: v_pk_max_f16 v20, v23, v23
-; GFX900-NEXT: v_pk_max_f16 v7, v7, v7
-; GFX900-NEXT: v_pk_max_f16 v21, v24, v24
-; GFX900-NEXT: v_pk_max_f16 v8, v8, v8
-; GFX900-NEXT: v_pk_max_f16 v22, v25, v25
-; GFX900-NEXT: v_pk_max_f16 v9, v9, v9
-; GFX900-NEXT: v_pk_max_f16 v23, v26, v26
-; GFX900-NEXT: v_pk_max_f16 v10, v10, v10
-; GFX900-NEXT: v_pk_max_f16 v24, v27, v27
-; GFX900-NEXT: v_pk_max_f16 v11, v11, v11
-; GFX900-NEXT: v_pk_max_f16 v25, v28, v28
-; GFX900-NEXT: v_pk_max_f16 v12, v12, v12
-; GFX900-NEXT: v_pk_max_f16 v26, v29, v29
-; GFX900-NEXT: v_pk_max_f16 v13, v13, v13
-; GFX900-NEXT: v_pk_max_f16 v27, v30, v30
-; GFX900-NEXT: v_pk_max_f16 v14, v14, v14
-; GFX900-NEXT: v_pk_max_f16 v15, v15, v15
-; GFX900-NEXT: v_pk_min_f16 v4, v4, v17
-; GFX900-NEXT: v_pk_min_f16 v5, v5, v18
-; GFX900-NEXT: v_pk_min_f16 v6, v6, v19
-; GFX900-NEXT: v_pk_min_f16 v7, v7, v20
-; GFX900-NEXT: v_pk_min_f16 v8, v8, v21
-; GFX900-NEXT: v_pk_min_f16 v9, v9, v22
-; GFX900-NEXT: v_pk_min_f16 v10, v10, v23
-; GFX900-NEXT: v_pk_min_f16 v11, v11, v24
-; GFX900-NEXT: v_pk_min_f16 v12, v12, v25
-; GFX900-NEXT: v_pk_min_f16 v13, v13, v26
-; GFX900-NEXT: v_pk_min_f16 v14, v14, v27
-; GFX900-NEXT: s_waitcnt vmcnt(0)
-; GFX900-NEXT: v_pk_max_f16 v16, v16, v16
-; GFX900-NEXT: v_pk_min_f16 v15, v15, v16
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX950-LABEL: v_minimumnum_v32f16:
-; GFX950: ; %bb.0:
-; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: scratch_load_dword v31, off, s32
-; GFX950-NEXT: v_pk_max_f16 v16, v16, v16
-; GFX950-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX950-NEXT: v_pk_max_f16 v17, v17, v17
-; GFX950-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX950-NEXT: v_pk_max_f16 v18, v18, v18
-; GFX950-NEXT: v_pk_max_f16 v2, v2, v2
-; GFX950-NEXT: v_pk_max_f16 v19, v19, v19
-; GFX950-NEXT: v_pk_max_f16 v3, v3, v3
-; GFX950-NEXT: v_pk_max_f16 v20, v20, v20
-; GFX950-NEXT: v_pk_max_f16 v4, v4, v4
-; GFX950-NEXT: v_pk_max_f16 v21, v21, v21
-; GFX950-NEXT: v_pk_max_f16 v5, v5, v5
-; GFX950-NEXT: v_pk_max_f16 v22, v22, v22
-; GFX950-NEXT: v_pk_max_f16 v6, v6, v6
-; GFX950-NEXT: v_pk_max_f16 v23, v23, v23
-; GFX950-NEXT: v_pk_max_f16 v7, v7, v7
-; GFX950-NEXT: v_pk_max_f16 v24, v24, v24
-; GFX950-NEXT: v_pk_max_f16 v8, v8, v8
-; GFX950-NEXT: v_pk_max_f16 v25, v25, v25
-; GFX950-NEXT: v_pk_max_f16 v9, v9, v9
-; GFX950-NEXT: v_pk_max_f16 v26, v26, v26
-; GFX950-NEXT: v_pk_max_f16 v10, v10, v10
-; GFX950-NEXT: v_pk_max_f16 v27, v27, v27
-; GFX950-NEXT: v_pk_max_f16 v11, v11, v11
-; GFX950-NEXT: v_pk_max_f16 v28, v28, v28
-; GFX950-NEXT: v_pk_max_f16 v12, v12, v12
-; GFX950-NEXT: v_pk_max_f16 v29, v29, v29
-; GFX950-NEXT: v_pk_max_f16 v13, v13, v13
-; GFX950-NEXT: v_pk_max_f16 v30, v30, v30
-; GFX950-NEXT: v_pk_max_f16 v14, v14, v14
-; GFX950-NEXT: v_pk_max_f16 v15, v15, v15
-; GFX950-NEXT: v_pk_min_f16 v0, v0, v16
-; GFX950-NEXT: v_pk_min_f16 v1, v1, v17
-; GFX950-NEXT: v_pk_min_f16 v2, v2, v18
-; GFX950-NEXT: v_pk_min_f16 v3, v3, v19
-; GFX950-NEXT: v_pk_min_f16 v4, v4, v20
-; GFX950-NEXT: v_pk_min_f16 v5, v5, v21
-; GFX950-NEXT: v_pk_min_f16 v6, v6, v22
-; GFX950-NEXT: v_pk_min_f16 v7, v7, v23
-; GFX950-NEXT: v_pk_min_f16 v8, v8, v24
-; GFX950-NEXT: v_pk_min_f16 v9, v9, v25
-; GFX950-NEXT: v_pk_min_f16 v10, v10, v26
-; GFX950-NEXT: v_pk_min_f16 v11, v11, v27
-; GFX950-NEXT: v_pk_min_f16 v12, v12, v28
-; GFX950-NEXT: v_pk_min_f16 v13, v13, v29
-; GFX950-NEXT: v_pk_min_f16 v14, v14, v30
-; GFX950-NEXT: s_waitcnt vmcnt(0)
-; GFX950-NEXT: v_pk_max_f16 v16, v31, v31
-; GFX950-NEXT: s_nop 0
-; GFX950-NEXT: v_pk_min_f16 v15, v15, v16
-; GFX950-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_minimumnum_v32f16:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32
-; GFX10-NEXT: v_pk_max_f16 v16, v16, v16
-; GFX10-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX10-NEXT: v_pk_max_f16 v17, v17, v17
-; GFX10-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX10-NEXT: v_pk_max_f16 v18, v18, v18
-; GFX10-NEXT: v_pk_max_f16 v2, v2, v2
-; GFX10-NEXT: v_pk_max_f16 v19, v19, v19
-; GFX10-NEXT: v_pk_max_f16 v3, v3, v3
-; GFX10-NEXT: v_pk_max_f16 v20, v20, v20
-; GFX10-NEXT: v_pk_max_f16 v4, v4, v4
-; GFX10-NEXT: v_pk_max_f16 v21, v21, v21
-; GFX10-NEXT: v_pk_max_f16 v5, v5, v5
-; GFX10-NEXT: v_pk_max_f16 v22, v22, v22
-; GFX10-NEXT: v_pk_max_f16 v6, v6, v6
-; GFX10-NEXT: v_pk_max_f16 v23, v23, v23
-; GFX10-NEXT: v_pk_max_f16 v7, v7, v7
-; GFX10-NEXT: v_pk_max_f16 v24, v24, v24
-; GFX10-NEXT: v_pk_max_f16 v8, v8, v8
-; GFX10-NEXT: v_pk_max_f16 v25, v25, v25
-; GFX10-NEXT: v_pk_max_f16 v9, v9, v9
-; GFX10-NEXT: v_pk_max_f16 v26, v26, v26
-; GFX10-NEXT: v_pk_max_f16 v10, v10, v10
-; GFX10-NEXT: v_pk_max_f16 v27, v27, v27
-; GFX10-NEXT: v_pk_max_f16 v11, v11, v11
-; GFX10-NEXT: v_pk_max_f16 v28, v28, v28
-; GFX10-NEXT: v_pk_max_f16 v12, v12, v12
-; GFX10-NEXT: v_pk_max_f16 v29, v29, v29
-; GFX10-NEXT: v_pk_max_f16 v13, v13, v13
-; GFX10-NEXT: v_pk_max_f16 v30, v30, v30
-; GFX10-NEXT: v_pk_max_f16 v14, v14, v14
-; GFX10-NEXT: v_pk_max_f16 v15, v15, v15
-; GFX10-NEXT: v_pk_min_f16 v0, v0, v16
-; GFX10-NEXT: v_pk_min_f16 v1, v1, v17
-; GFX10-NEXT: v_pk_min_f16 v2, v2, v18
-; GFX10-NEXT: v_pk_min_f16 v3, v3, v19
-; GFX10-NEXT: v_pk_min_f16 v4, v4, v20
-; GFX10-NEXT: v_pk_min_f16 v5, v5, v21
-; GFX10-NEXT: v_pk_min_f16 v6, v6, v22
-; GFX10-NEXT: v_pk_min_f16 v7, v7, v23
-; GFX10-NEXT: v_pk_min_f16 v8, v8, v24
-; GFX10-NEXT: v_pk_min_f16 v9, v9, v25
-; GFX10-NEXT: v_pk_min_f16 v10, v10, v26
-; GFX10-NEXT: v_pk_min_f16 v11, v11, v27
-; GFX10-NEXT: v_pk_min_f16 v12, v12, v28
-; GFX10-NEXT: v_pk_min_f16 v13, v13, v29
-; GFX10-NEXT: v_pk_min_f16 v14, v14, v30
-; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: v_pk_max_f16 v16, v31, v31
-; GFX10-NEXT: v_pk_min_f16 v15, v15, v16
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: v_minimumnum_v32f16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: scratch_load_b32 v31, off, s32
-; GFX11-NEXT: v_pk_max_f16 v16, v16, v16
-; GFX11-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX11-NEXT: v_pk_max_f16 v17, v17, v17
-; GFX11-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX11-NEXT: v_pk_max_f16 v18, v18, v18
-; GFX11-NEXT: v_pk_max_f16 v2, v2, v2
-; GFX11-NEXT: v_pk_max_f16 v19, v19, v19
-; GFX11-NEXT: v_pk_max_f16 v3, v3, v3
-; GFX11-NEXT: v_pk_max_f16 v20, v20, v20
-; GFX11-NEXT: v_pk_max_f16 v4, v4, v4
-; GFX11-NEXT: v_pk_max_f16 v21, v21, v21
-; GFX11-NEXT: v_pk_max_f16 v5, v5, v5
-; GFX11-NEXT: v_pk_max_f16 v22, v22, v22
-; GFX11-NEXT: v_pk_max_f16 v6, v6, v6
-; GFX11-NEXT: v_pk_max_f16 v23, v23, v23
-; GFX11-NEXT: v_pk_max_f16 v7, v7, v7
-; GFX11-NEXT: v_pk_max_f16 v24, v24, v24
-; GFX11-NEXT: v_pk_max_f16 v8, v8, v8
-; GFX11-NEXT: v_pk_max_f16 v25, v25, v25
-; GFX11-NEXT: v_pk_max_f16 v9, v9, v9
-; GFX11-NEXT: v_pk_max_f16 v26, v26, v26
-; GFX11-NEXT: v_pk_max_f16 v10, v10, v10
-; GFX11-NEXT: v_pk_max_f16 v27, v27, v27
-; GFX11-NEXT: v_pk_max_f16 v11, v11, v11
-; GFX11-NEXT: v_pk_max_f16 v28, v28, v28
-; GFX11-NEXT: v_pk_max_f16 v12, v12, v12
-; GFX11-NEXT: v_pk_max_f16 v29, v29, v29
-; GFX11-NEXT: v_pk_max_f16 v13, v13, v13
-; GFX11-NEXT: v_pk_max_f16 v30, v30, v30
-; GFX11-NEXT: v_pk_max_f16 v14, v14, v14
-; GFX11-NEXT: v_pk_max_f16 v15, v15, v15
-; GFX11-NEXT: v_pk_min_f16 v0, v0, v16
-; GFX11-NEXT: v_pk_min_f16 v1, v1, v17
-; GFX11-NEXT: v_pk_min_f16 v2, v2, v18
-; GFX11-NEXT: v_pk_min_f16 v3, v3, v19
-; GFX11-NEXT: v_pk_min_f16 v4, v4, v20
-; GFX11-NEXT: v_pk_min_f16 v5, v5, v21
-; GFX11-NEXT: v_pk_min_f16 v6, v6, v22
-; GFX11-NEXT: v_pk_min_f16 v7, v7, v23
-; GFX11-NEXT: v_pk_min_f16 v8, v8, v24
-; GFX11-NEXT: v_pk_min_f16 v9, v9, v25
-; GFX11-NEXT: v_pk_min_f16 v10, v10, v26
-; GFX11-NEXT: v_pk_min_f16 v11, v11, v27
-; GFX11-NEXT: v_pk_min_f16 v12, v12, v28
-; GFX11-NEXT: v_pk_min_f16 v13, v13, v29
-; GFX11-NEXT: v_pk_min_f16 v14, v14, v30
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_pk_max_f16 v16, v31, v31
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_pk_min_f16 v15, v15, v16
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_minimumnum_v32f16:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: scratch_load_b32 v31, off, s32
-; GFX12-NEXT: v_pk_max_num_f16 v16, v16, v16
-; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v0
-; GFX12-NEXT: v_pk_max_num_f16 v17, v17, v17
-; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v1
-; GFX12-NEXT: v_pk_max_num_f16 v18, v18, v18
-; GFX12-NEXT: v_pk_max_num_f16 v2, v2, v2
-; GFX12-NEXT: v_pk_max_num_f16 v19, v19, v19
-; GFX12-NEXT: v_pk_max_num_f16 v3, v3, v3
-; GFX12-NEXT: v_pk_max_num_f16 v20, v20, v20
-; GFX12-NEXT: v_pk_max_num_f16 v4, v4, v4
-; GFX12-NEXT: v_pk_max_num_f16 v21, v21, v21
-; GFX12-NEXT: v_pk_max_num_f16 v5, v5, v5
-; GFX12-NEXT: v_pk_max_num_f16 v22, v22, v22
-; GFX12-NEXT: v_pk_max_num_f16 v6, v6, v6
-; GFX12-NEXT: v_pk_max_num_f16 v23, v23, v23
-; GFX12-NEXT: v_pk_max_num_f16 v7, v7, v7
-; GFX12-NEXT: v_pk_max_num_f16 v24, v24, v24
-; GFX12-NEXT: v_pk_max_num_f16 v8, v8, v8
-; GFX12-NEXT: v_pk_max_num_f16 v25, v25, v25
-; GFX12-NEXT: v_pk_max_num_f16 v9, v9, v9
-; GFX12-NEXT: v_pk_max_num_f16 v26, v26, v26
-; GFX12-NEXT: v_pk_max_num_f16 v10, v10, v10
-; GFX12-NEXT: v_pk_max_num_f16 v27, v27, v27
-; GFX12-NEXT: v_pk_max_num_f16 v11, v11, v11
-; GFX12-NEXT: v_pk_max_num_f16 v28, v28, v28
-; GFX12-NEXT: v_pk_max_num_f16 v12, v12, v12
-; GFX12-NEXT: v_pk_max_num_f16 v29, v29, v29
-; GFX12-NEXT: v_pk_max_num_f16 v13, v13, v13
-; GFX12-NEXT: v_pk_max_num_f16 v30, v30, v30
-; GFX12-NEXT: v_pk_max_num_f16 v14, v14, v14
-; GFX12-NEXT: v_pk_max_num_f16 v15, v15, v15
-; GFX12-NEXT: v_pk_min_num_f16 v0, v0, v16
-; GFX12-NEXT: v_pk_min_num_f16 v1, v1, v17
-; GFX12-NEXT: v_pk_min_num_f16 v2, v2, v18
-; GFX12-NEXT: v_pk_min_num_f16 v3, v3, v19
-; GFX12-NEXT: v_pk_min_num_f16 v4, v4, v20
-; GFX12-NEXT: v_pk_min_num_f16 v5, v5, v21
-; GFX12-NEXT: v_pk_min_num_f16 v6, v6, v22
-; GFX12-NEXT: v_pk_min_num_f16 v7, v7, v23
-; GFX12-NEXT: v_pk_min_num_f16 v8, v8, v24
-; GFX12-NEXT: v_pk_min_num_f16 v9, v9, v25
-; GFX12-NEXT: v_pk_min_num_f16 v10, v10, v26
-; GFX12-NEXT: v_pk_min_num_f16 v11, v11, v27
-; GFX12-NEXT: v_pk_min_num_f16 v12, v12, v28
-; GFX12-NEXT: v_pk_min_num_f16 v13, v13, v29
-; GFX12-NEXT: v_pk_min_num_f16 v14, v14, v30
-; GFX12-NEXT: s_wait_loadcnt 0x0
-; GFX12-NEXT: v_pk_max_num_f16 v16, v31, v31
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT: v_pk_min_num_f16 v15, v15, v16
-; GFX12-NEXT: s_setpc_b64 s[30:31]
- %result = call <32 x half> @llvm.minimumnum.v32f16(<32 x half> %x, <32 x half> %y)
- ret <32 x half> %result
-}
-
-define <2 x float> @v_minimumnum_v2f32(<2 x float> %x, <2 x float> %y) {
-; GFX7-LABEL: v_minimumnum_v2f32:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_mul_f32_e32 v2, 1.0, v2
-; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
-; GFX7-NEXT: v_min_f32_e32 v0, v0, v2
-; GFX7-NEXT: v_mul_f32_e32 v2, 1.0, v3
-; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
-; GFX7-NEXT: v_min_f32_e32 v1, v1, v2
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_minimumnum_v2f32:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_mul_f32_e32 v2, 1.0, v2
-; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0
-; GFX8-NEXT: v_min_f32_e32 v0, v0, v2
-; GFX8-NEXT: v_mul_f32_e32 v2, 1.0, v3
-; GFX8-NEXT: v_mul_f32_e32 v1, 1.0, v1
-; GFX8-NEXT: v_min_f32_e32 v1, v1, v2
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: v_minimumnum_v2f32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v2, v2, v2
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-NEXT: v_min_f32_e32 v0, v0, v2
-; GFX9-NEXT: v_max_f32_e32 v2, v3, v3
-; GFX9-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX9-NEXT: v_min_f32_e32 v1, v1, v2
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_minimumnum_v2f32:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_max_f32_e32 v2, v2, v2
-; GFX10-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX10-NEXT: v_max_f32_e32 v3, v3, v3
-; GFX10-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX10-NEXT: v_min_f32_e32 v0, v0, v2
-; GFX10-NEXT: v_min_f32_e32 v1, v1, v3
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: v_minimumnum_v2f32:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v3, v3, v3
-; GFX11-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_dual_min_f32 v0, v0, v2 :: v_dual_min_f32 v1, v1, v3
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_minimumnum_v2f32:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3
-; GFX12-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT: v_dual_min_num_f32 v0, v0, v2 :: v_dual_min_num_f32 v1, v1, v3
-; GFX12-NEXT: s_setpc_b64 s[30:31]
+define <16 x half> @v_minimumnum_v16f16(<16 x half> %x, <16 x half> %y) {
+; GFX7-SDAG-LABEL: v_minimumnum_v16f16:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v16, v16
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v16, v16
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v16
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v16, v17
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v4, v4
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v17, v20
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v16, v16
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v5, v5
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v6, v6
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GFX7-SDAG-NEXT: v_min_f32_e32 v1, v1, v16
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v16, v18
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v18, v21
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v17, v17
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v16, v16
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v18, v18
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v6, v6
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v7, v7
+; GFX7-SDAG-NEXT: v_min_f32_e32 v2, v2, v16
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v16, v19
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v19, v22
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v20, v23
+; GFX7-SDAG-NEXT: v_min_f32_e32 v4, v4, v17
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v16, v16
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v19, v19
+; GFX7-SDAG-NEXT: v_min_f32_e32 v5, v5, v18
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v8, v8
+; GFX7-SDAG-NEXT: v_min_f32_e32 v3, v3, v16
+; GFX7-SDAG-NEXT: buffer_load_dword v16, off, s[0:3], s32
+; GFX7-SDAG-NEXT: v_min_f32_e32 v6, v6, v19
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v17, v24
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v9, v9
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v18, v25
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v10, v10
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v19, v26
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v7, v7
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v20, v20
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v8, v8
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v17, v17
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v9, v9
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v18, v18
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v10, v10
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v19, v19
+; GFX7-SDAG-NEXT: v_min_f32_e32 v7, v7, v20
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v11, v11
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v20, v27
+; GFX7-SDAG-NEXT: v_min_f32_e32 v8, v8, v17
+; GFX7-SDAG-NEXT: v_min_f32_e32 v9, v9, v18
+; GFX7-SDAG-NEXT: v_min_f32_e32 v10, v10, v19
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v12, v12
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v17, v28
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v13, v13
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v18, v29
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v14, v14
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v19, v30
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v15, v15
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v11, v11
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v20, v20
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v12, v12
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v17, v17
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v13, v13
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v18, v18
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v14, v14
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v19, v19
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v15, v15
+; GFX7-SDAG-NEXT: v_min_f32_e32 v11, v11, v20
+; GFX7-SDAG-NEXT: v_min_f32_e32 v12, v12, v17
+; GFX7-SDAG-NEXT: v_min_f32_e32 v13, v13, v18
+; GFX7-SDAG-NEXT: v_min_f32_e32 v14, v14, v19
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v16, v16
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v16, v16
+; GFX7-SDAG-NEXT: v_min_f32_e32 v15, v15, v16
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_minimumnum_v16f16:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v16, v16
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v16
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v16, v17
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v17, v20
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GFX7-GISEL-NEXT: v_min_f32_e32 v1, v1, v16
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v16, v18
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v18, v21
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v6, v6
+; GFX7-GISEL-NEXT: v_min_f32_e32 v4, v4, v17
+; GFX7-GISEL-NEXT: v_min_f32_e32 v2, v2, v16
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v16, v19
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v19, v22
+; GFX7-GISEL-NEXT: v_min_f32_e32 v5, v5, v18
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v7, v7
+; GFX7-GISEL-NEXT: v_min_f32_e32 v3, v3, v16
+; GFX7-GISEL-NEXT: buffer_load_dword v16, off, s[0:3], s32
+; GFX7-GISEL-NEXT: v_min_f32_e32 v6, v6, v19
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v17, v23
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v8, v8
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v18, v24
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v9, v9
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v19, v25
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v10, v10
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v20, v26
+; GFX7-GISEL-NEXT: v_min_f32_e32 v7, v7, v17
+; GFX7-GISEL-NEXT: v_min_f32_e32 v8, v8, v18
+; GFX7-GISEL-NEXT: v_min_f32_e32 v9, v9, v19
+; GFX7-GISEL-NEXT: v_min_f32_e32 v10, v10, v20
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v11, v11
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v17, v27
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v12, v12
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v18, v28
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v13, v13
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v19, v29
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v14, v14
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v20, v30
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v15, v15
+; GFX7-GISEL-NEXT: v_min_f32_e32 v11, v11, v17
+; GFX7-GISEL-NEXT: v_min_f32_e32 v12, v12, v18
+; GFX7-GISEL-NEXT: v_min_f32_e32 v13, v13, v19
+; GFX7-GISEL-NEXT: v_min_f32_e32 v14, v14, v20
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v4, v4
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v5, v5
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v6, v6
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v7, v7
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v8, v8
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v9, v9
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v10, v10
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v11, v11
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v12, v12
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v13, v13
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v14, v14
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v16, v16
+; GFX7-GISEL-NEXT: v_min_f32_e32 v15, v15, v16
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v15, v15
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_minimumnum_v16f16:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v16, v15, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v17, v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_min_f16_sdwa v16, v17, v16 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v17, v14, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v18, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_min_f16_sdwa v17, v18, v17 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v18, v13, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v19, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_min_f16_sdwa v18, v19, v18 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v19, v12, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v20, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_min_f16_sdwa v19, v20, v19 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v20, v11, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v21, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_min_f16_sdwa v20, v21, v20 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v21, v10, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v22, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_min_f16_sdwa v21, v22, v21 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v22, v9, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v23, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_min_f16_sdwa v22, v23, v22 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v23, v8, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v24, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_e32 v15, v15, v15
+; GFX8-SDAG-NEXT: v_max_f16_e32 v7, v7, v7
+; GFX8-SDAG-NEXT: v_max_f16_e32 v14, v14, v14
+; GFX8-SDAG-NEXT: v_max_f16_e32 v6, v6, v6
+; GFX8-SDAG-NEXT: v_max_f16_e32 v13, v13, v13
+; GFX8-SDAG-NEXT: v_max_f16_e32 v5, v5, v5
+; GFX8-SDAG-NEXT: v_max_f16_e32 v12, v12, v12
+; GFX8-SDAG-NEXT: v_max_f16_e32 v4, v4, v4
+; GFX8-SDAG-NEXT: v_max_f16_e32 v11, v11, v11
+; GFX8-SDAG-NEXT: v_max_f16_e32 v3, v3, v3
+; GFX8-SDAG-NEXT: v_max_f16_e32 v10, v10, v10
+; GFX8-SDAG-NEXT: v_max_f16_e32 v2, v2, v2
+; GFX8-SDAG-NEXT: v_max_f16_e32 v9, v9, v9
+; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX8-SDAG-NEXT: v_max_f16_e32 v8, v8, v8
+; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX8-SDAG-NEXT: v_min_f16_sdwa v23, v24, v23 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_min_f16_e32 v7, v7, v15
+; GFX8-SDAG-NEXT: v_min_f16_e32 v6, v6, v14
+; GFX8-SDAG-NEXT: v_min_f16_e32 v5, v5, v13
+; GFX8-SDAG-NEXT: v_min_f16_e32 v4, v4, v12
+; GFX8-SDAG-NEXT: v_min_f16_e32 v3, v3, v11
+; GFX8-SDAG-NEXT: v_min_f16_e32 v2, v2, v10
+; GFX8-SDAG-NEXT: v_min_f16_e32 v1, v1, v9
+; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v8
+; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v23
+; GFX8-SDAG-NEXT: v_or_b32_e32 v1, v1, v22
+; GFX8-SDAG-NEXT: v_or_b32_e32 v2, v2, v21
+; GFX8-SDAG-NEXT: v_or_b32_e32 v3, v3, v20
+; GFX8-SDAG-NEXT: v_or_b32_e32 v4, v4, v19
+; GFX8-SDAG-NEXT: v_or_b32_e32 v5, v5, v18
+; GFX8-SDAG-NEXT: v_or_b32_e32 v6, v6, v17
+; GFX8-SDAG-NEXT: v_or_b32_e32 v7, v7, v16
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_minimumnum_v16f16:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_max_f16_e32 v16, v0, v0
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v19, v8, v8
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v8, v8, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v17, v1, v1
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_min_f16_sdwa v0, v0, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_max_f16_e32 v8, v9, v9
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v9, v9, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v18, v2, v2
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v2, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_min_f16_sdwa v1, v1, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_max_f16_e32 v9, v10, v10
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v10, v10, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_min_f16_e32 v16, v16, v19
+; GFX8-GISEL-NEXT: v_max_f16_e32 v19, v3, v3
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v3, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_min_f16_sdwa v2, v2, v10 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_max_f16_e32 v10, v11, v11
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v11, v11, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_min_f16_e32 v8, v17, v8
+; GFX8-GISEL-NEXT: v_max_f16_e32 v17, v4, v4
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v4, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_min_f16_sdwa v3, v3, v11 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_max_f16_e32 v11, v12, v12
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v12, v12, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_min_f16_e32 v9, v18, v9
+; GFX8-GISEL-NEXT: v_max_f16_e32 v18, v5, v5
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v5, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_min_f16_sdwa v4, v4, v12 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_max_f16_e32 v12, v13, v13
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v13, v13, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_min_f16_e32 v10, v19, v10
+; GFX8-GISEL-NEXT: v_max_f16_e32 v19, v6, v6
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v6, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_min_f16_e32 v11, v17, v11
+; GFX8-GISEL-NEXT: v_max_f16_e32 v17, v7, v7
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v7, v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_min_f16_e32 v12, v18, v12
+; GFX8-GISEL-NEXT: v_max_f16_e32 v18, v14, v14
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v14, v14, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_min_f16_sdwa v5, v5, v13 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_max_f16_e32 v13, v15, v15
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v15, v15, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_min_f16_e32 v18, v19, v18
+; GFX8-GISEL-NEXT: v_min_f16_sdwa v6, v6, v14 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_min_f16_e32 v13, v17, v13
+; GFX8-GISEL-NEXT: v_min_f16_sdwa v7, v7, v15 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v16, v0
+; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v8, v1
+; GFX8-GISEL-NEXT: v_or_b32_e32 v2, v9, v2
+; GFX8-GISEL-NEXT: v_or_b32_e32 v3, v10, v3
+; GFX8-GISEL-NEXT: v_or_b32_e32 v4, v11, v4
+; GFX8-GISEL-NEXT: v_or_b32_e32 v5, v12, v5
+; GFX8-GISEL-NEXT: v_or_b32_e32 v6, v18, v6
+; GFX8-GISEL-NEXT: v_or_b32_e32 v7, v13, v7
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_minimumnum_v16f16:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_pk_max_f16 v8, v8, v8
+; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX900-SDAG-NEXT: v_pk_min_f16 v0, v0, v8
+; GFX900-SDAG-NEXT: v_pk_max_f16 v8, v9, v9
+; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX900-SDAG-NEXT: v_pk_min_f16 v1, v1, v8
+; GFX900-SDAG-NEXT: v_pk_max_f16 v8, v10, v10
+; GFX900-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX900-SDAG-NEXT: v_pk_min_f16 v2, v2, v8
+; GFX900-SDAG-NEXT: v_pk_max_f16 v8, v11, v11
+; GFX900-SDAG-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX900-SDAG-NEXT: v_pk_min_f16 v3, v3, v8
+; GFX900-SDAG-NEXT: v_pk_max_f16 v8, v12, v12
+; GFX900-SDAG-NEXT: v_pk_max_f16 v4, v4, v4
+; GFX900-SDAG-NEXT: v_pk_min_f16 v4, v4, v8
+; GFX900-SDAG-NEXT: v_pk_max_f16 v8, v13, v13
+; GFX900-SDAG-NEXT: v_pk_max_f16 v5, v5, v5
+; GFX900-SDAG-NEXT: v_pk_min_f16 v5, v5, v8
+; GFX900-SDAG-NEXT: v_pk_max_f16 v8, v14, v14
+; GFX900-SDAG-NEXT: v_pk_max_f16 v6, v6, v6
+; GFX900-SDAG-NEXT: v_pk_min_f16 v6, v6, v8
+; GFX900-SDAG-NEXT: v_pk_max_f16 v8, v15, v15
+; GFX900-SDAG-NEXT: v_pk_max_f16 v7, v7, v7
+; GFX900-SDAG-NEXT: v_pk_min_f16 v7, v7, v8
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: v_minimumnum_v16f16:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX9-GISEL-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX9-GISEL-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX9-GISEL-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX9-GISEL-NEXT: v_pk_max_f16 v4, v4, v4
+; GFX9-GISEL-NEXT: v_pk_max_f16 v5, v5, v5
+; GFX9-GISEL-NEXT: v_pk_max_f16 v6, v6, v6
+; GFX9-GISEL-NEXT: v_pk_max_f16 v7, v7, v7
+; GFX9-GISEL-NEXT: v_pk_max_f16 v8, v8, v8
+; GFX9-GISEL-NEXT: v_pk_max_f16 v9, v9, v9
+; GFX9-GISEL-NEXT: v_pk_max_f16 v10, v10, v10
+; GFX9-GISEL-NEXT: v_pk_max_f16 v11, v11, v11
+; GFX9-GISEL-NEXT: v_pk_max_f16 v12, v12, v12
+; GFX9-GISEL-NEXT: v_pk_max_f16 v13, v13, v13
+; GFX9-GISEL-NEXT: v_pk_max_f16 v14, v14, v14
+; GFX9-GISEL-NEXT: v_pk_max_f16 v15, v15, v15
+; GFX9-GISEL-NEXT: v_pk_min_f16 v0, v0, v8
+; GFX9-GISEL-NEXT: v_pk_min_f16 v1, v1, v9
+; GFX9-GISEL-NEXT: v_pk_min_f16 v2, v2, v10
+; GFX9-GISEL-NEXT: v_pk_min_f16 v3, v3, v11
+; GFX9-GISEL-NEXT: v_pk_min_f16 v4, v4, v12
+; GFX9-GISEL-NEXT: v_pk_min_f16 v5, v5, v13
+; GFX9-GISEL-NEXT: v_pk_min_f16 v6, v6, v14
+; GFX9-GISEL-NEXT: v_pk_min_f16 v7, v7, v15
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_minimumnum_v16f16:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_pk_max_f16 v8, v8, v8
+; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX950-SDAG-NEXT: v_pk_min_f16 v0, v0, v8
+; GFX950-SDAG-NEXT: v_pk_max_f16 v8, v9, v9
+; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX950-SDAG-NEXT: v_pk_min_f16 v1, v1, v8
+; GFX950-SDAG-NEXT: v_pk_max_f16 v8, v10, v10
+; GFX950-SDAG-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX950-SDAG-NEXT: v_pk_min_f16 v2, v2, v8
+; GFX950-SDAG-NEXT: v_pk_max_f16 v8, v11, v11
+; GFX950-SDAG-NEXT: v_pk_max_f16 v4, v4, v4
+; GFX950-SDAG-NEXT: v_pk_min_f16 v3, v3, v8
+; GFX950-SDAG-NEXT: v_pk_max_f16 v8, v12, v12
+; GFX950-SDAG-NEXT: v_pk_max_f16 v5, v5, v5
+; GFX950-SDAG-NEXT: v_pk_min_f16 v4, v4, v8
+; GFX950-SDAG-NEXT: v_pk_max_f16 v8, v13, v13
+; GFX950-SDAG-NEXT: v_pk_max_f16 v6, v6, v6
+; GFX950-SDAG-NEXT: v_pk_min_f16 v5, v5, v8
+; GFX950-SDAG-NEXT: v_pk_max_f16 v8, v14, v14
+; GFX950-SDAG-NEXT: v_pk_max_f16 v7, v7, v7
+; GFX950-SDAG-NEXT: v_pk_min_f16 v6, v6, v8
+; GFX950-SDAG-NEXT: v_pk_max_f16 v8, v15, v15
+; GFX950-SDAG-NEXT: s_nop 0
+; GFX950-SDAG-NEXT: v_pk_min_f16 v7, v7, v8
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_minimumnum_v16f16:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_pk_max_f16 v8, v8, v8
+; GFX10-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX10-SDAG-NEXT: v_pk_max_f16 v9, v9, v9
+; GFX10-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX10-SDAG-NEXT: v_pk_max_f16 v10, v10, v10
+; GFX10-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX10-SDAG-NEXT: v_pk_min_f16 v0, v0, v8
+; GFX10-SDAG-NEXT: v_pk_max_f16 v8, v11, v11
+; GFX10-SDAG-NEXT: v_pk_min_f16 v1, v1, v9
+; GFX10-SDAG-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX10-SDAG-NEXT: v_pk_min_f16 v2, v2, v10
+; GFX10-SDAG-NEXT: v_pk_max_f16 v9, v12, v12
+; GFX10-SDAG-NEXT: v_pk_max_f16 v4, v4, v4
+; GFX10-SDAG-NEXT: v_pk_max_f16 v10, v13, v13
+; GFX10-SDAG-NEXT: v_pk_max_f16 v5, v5, v5
+; GFX10-SDAG-NEXT: v_pk_max_f16 v11, v14, v14
+; GFX10-SDAG-NEXT: v_pk_max_f16 v6, v6, v6
+; GFX10-SDAG-NEXT: v_pk_max_f16 v12, v15, v15
+; GFX10-SDAG-NEXT: v_pk_max_f16 v7, v7, v7
+; GFX10-SDAG-NEXT: v_pk_min_f16 v3, v3, v8
+; GFX10-SDAG-NEXT: v_pk_min_f16 v4, v4, v9
+; GFX10-SDAG-NEXT: v_pk_min_f16 v5, v5, v10
+; GFX10-SDAG-NEXT: v_pk_min_f16 v6, v6, v11
+; GFX10-SDAG-NEXT: v_pk_min_f16 v7, v7, v12
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_minimumnum_v16f16:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX10-GISEL-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX10-GISEL-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX10-GISEL-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX10-GISEL-NEXT: v_pk_max_f16 v4, v4, v4
+; GFX10-GISEL-NEXT: v_pk_max_f16 v5, v5, v5
+; GFX10-GISEL-NEXT: v_pk_max_f16 v6, v6, v6
+; GFX10-GISEL-NEXT: v_pk_max_f16 v7, v7, v7
+; GFX10-GISEL-NEXT: v_pk_max_f16 v8, v8, v8
+; GFX10-GISEL-NEXT: v_pk_max_f16 v9, v9, v9
+; GFX10-GISEL-NEXT: v_pk_max_f16 v10, v10, v10
+; GFX10-GISEL-NEXT: v_pk_max_f16 v11, v11, v11
+; GFX10-GISEL-NEXT: v_pk_max_f16 v12, v12, v12
+; GFX10-GISEL-NEXT: v_pk_max_f16 v13, v13, v13
+; GFX10-GISEL-NEXT: v_pk_max_f16 v14, v14, v14
+; GFX10-GISEL-NEXT: v_pk_max_f16 v15, v15, v15
+; GFX10-GISEL-NEXT: v_pk_min_f16 v0, v0, v8
+; GFX10-GISEL-NEXT: v_pk_min_f16 v1, v1, v9
+; GFX10-GISEL-NEXT: v_pk_min_f16 v2, v2, v10
+; GFX10-GISEL-NEXT: v_pk_min_f16 v3, v3, v11
+; GFX10-GISEL-NEXT: v_pk_min_f16 v4, v4, v12
+; GFX10-GISEL-NEXT: v_pk_min_f16 v5, v5, v13
+; GFX10-GISEL-NEXT: v_pk_min_f16 v6, v6, v14
+; GFX10-GISEL-NEXT: v_pk_min_f16 v7, v7, v15
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_minimumnum_v16f16:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_pk_max_f16 v8, v8, v8
+; GFX11-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX11-SDAG-NEXT: v_pk_max_f16 v9, v9, v9
+; GFX11-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX11-SDAG-NEXT: v_pk_max_f16 v10, v10, v10
+; GFX11-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX11-SDAG-NEXT: v_pk_min_f16 v0, v0, v8
+; GFX11-SDAG-NEXT: v_pk_max_f16 v8, v11, v11
+; GFX11-SDAG-NEXT: v_pk_min_f16 v1, v1, v9
+; GFX11-SDAG-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX11-SDAG-NEXT: v_pk_min_f16 v2, v2, v10
+; GFX11-SDAG-NEXT: v_pk_max_f16 v9, v12, v12
+; GFX11-SDAG-NEXT: v_pk_max_f16 v4, v4, v4
+; GFX11-SDAG-NEXT: v_pk_max_f16 v10, v13, v13
+; GFX11-SDAG-NEXT: v_pk_max_f16 v5, v5, v5
+; GFX11-SDAG-NEXT: v_pk_max_f16 v11, v14, v14
+; GFX11-SDAG-NEXT: v_pk_max_f16 v6, v6, v6
+; GFX11-SDAG-NEXT: v_pk_max_f16 v12, v15, v15
+; GFX11-SDAG-NEXT: v_pk_max_f16 v7, v7, v7
+; GFX11-SDAG-NEXT: v_pk_min_f16 v3, v3, v8
+; GFX11-SDAG-NEXT: v_pk_min_f16 v4, v4, v9
+; GFX11-SDAG-NEXT: v_pk_min_f16 v5, v5, v10
+; GFX11-SDAG-NEXT: v_pk_min_f16 v6, v6, v11
+; GFX11-SDAG-NEXT: v_pk_min_f16 v7, v7, v12
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_minimumnum_v16f16:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX11-GISEL-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX11-GISEL-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX11-GISEL-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX11-GISEL-NEXT: v_pk_max_f16 v4, v4, v4
+; GFX11-GISEL-NEXT: v_pk_max_f16 v5, v5, v5
+; GFX11-GISEL-NEXT: v_pk_max_f16 v6, v6, v6
+; GFX11-GISEL-NEXT: v_pk_max_f16 v7, v7, v7
+; GFX11-GISEL-NEXT: v_pk_max_f16 v8, v8, v8
+; GFX11-GISEL-NEXT: v_pk_max_f16 v9, v9, v9
+; GFX11-GISEL-NEXT: v_pk_max_f16 v10, v10, v10
+; GFX11-GISEL-NEXT: v_pk_max_f16 v11, v11, v11
+; GFX11-GISEL-NEXT: v_pk_max_f16 v12, v12, v12
+; GFX11-GISEL-NEXT: v_pk_max_f16 v13, v13, v13
+; GFX11-GISEL-NEXT: v_pk_max_f16 v14, v14, v14
+; GFX11-GISEL-NEXT: v_pk_max_f16 v15, v15, v15
+; GFX11-GISEL-NEXT: v_pk_min_f16 v0, v0, v8
+; GFX11-GISEL-NEXT: v_pk_min_f16 v1, v1, v9
+; GFX11-GISEL-NEXT: v_pk_min_f16 v2, v2, v10
+; GFX11-GISEL-NEXT: v_pk_min_f16 v3, v3, v11
+; GFX11-GISEL-NEXT: v_pk_min_f16 v4, v4, v12
+; GFX11-GISEL-NEXT: v_pk_min_f16 v5, v5, v13
+; GFX11-GISEL-NEXT: v_pk_min_f16 v6, v6, v14
+; GFX11-GISEL-NEXT: v_pk_min_f16 v7, v7, v15
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-SDAG-LABEL: v_minimumnum_v16f16:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v8, v8, v8
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v9, v9, v9
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v10, v10, v10
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v2
+; GFX12-SDAG-NEXT: v_pk_min_num_f16 v0, v0, v8
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v8, v11, v11
+; GFX12-SDAG-NEXT: v_pk_min_num_f16 v1, v1, v9
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v3
+; GFX12-SDAG-NEXT: v_pk_min_num_f16 v2, v2, v10
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v9, v12, v12
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v4, v4, v4
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v10, v13, v13
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v5, v5, v5
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v11, v14, v14
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v6, v6, v6
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v12, v15, v15
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v7, v7, v7
+; GFX12-SDAG-NEXT: v_pk_min_num_f16 v3, v3, v8
+; GFX12-SDAG-NEXT: v_pk_min_num_f16 v4, v4, v9
+; GFX12-SDAG-NEXT: v_pk_min_num_f16 v5, v5, v10
+; GFX12-SDAG-NEXT: v_pk_min_num_f16 v6, v6, v11
+; GFX12-SDAG-NEXT: v_pk_min_num_f16 v7, v7, v12
+; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GISEL-LABEL: v_minimumnum_v16f16:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v2
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v3
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v4, v4, v4
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v5, v5, v5
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v6, v6, v6
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v7, v7, v7
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v8, v8, v8
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v9, v9, v9
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v10, v10, v10
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v11, v11, v11
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v12, v12, v12
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v13, v13, v13
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v14, v14, v14
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v15, v15, v15
+; GFX12-GISEL-NEXT: v_pk_min_num_f16 v0, v0, v8
+; GFX12-GISEL-NEXT: v_pk_min_num_f16 v1, v1, v9
+; GFX12-GISEL-NEXT: v_pk_min_num_f16 v2, v2, v10
+; GFX12-GISEL-NEXT: v_pk_min_num_f16 v3, v3, v11
+; GFX12-GISEL-NEXT: v_pk_min_num_f16 v4, v4, v12
+; GFX12-GISEL-NEXT: v_pk_min_num_f16 v5, v5, v13
+; GFX12-GISEL-NEXT: v_pk_min_num_f16 v6, v6, v14
+; GFX12-GISEL-NEXT: v_pk_min_num_f16 v7, v7, v15
+; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
+ %result = call <16 x half> @llvm.minimumnum.v16f16(<16 x half> %x, <16 x half> %y)
+ ret <16 x half> %result
+}
+
+define <32 x half> @v_minimumnum_v32f16(<32 x half> %x, <32 x half> %y) {
+; GFX7-SDAG-LABEL: v_minimumnum_v32f16:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v4, v4
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v5, v5
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v6, v6
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v7, v7
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v6, v6
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v7, v7
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v8, v8
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v9, v9
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v10, v10
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v11, v11
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v8, v8
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v9, v9
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v10, v10
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v11, v11
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v12, v12
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v13, v13
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v14, v14
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v15, v15
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v12, v12
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v13, v13
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v14, v14
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v15, v15
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v16, v16
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v17, v17
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v18, v18
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v19, v19
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v16, v16
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v17, v17
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v18, v18
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v19, v19
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v20, v20
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v21, v21
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v22, v22
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v23, v23
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v20, v20
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v21, v21
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v22, v22
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v23, v23
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v24, v24
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v25, v25
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v26, v26
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v27, v27
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v24, v24
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v25, v25
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v26, v26
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v27, v27
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v28, v28
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v29, v29
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v30, v30
+; GFX7-SDAG-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:128
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v28, v28
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v29, v29
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v30, v30
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v31
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:8
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v32, v32
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v32, v32
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_min_f32_e32 v1, v1, v31
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:12
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_min_f32_e32 v2, v2, v31
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:16
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_min_f32_e32 v3, v3, v31
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:20
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_min_f32_e32 v4, v4, v31
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:24
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_min_f32_e32 v5, v5, v31
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_min_f32_e32 v6, v6, v31
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:32
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_min_f32_e32 v7, v7, v31
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:36
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_min_f32_e32 v8, v8, v31
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:40
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_min_f32_e32 v9, v9, v31
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:44
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_min_f32_e32 v10, v10, v31
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:48
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_min_f32_e32 v11, v11, v31
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:52
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_min_f32_e32 v12, v12, v31
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:56
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_min_f32_e32 v13, v13, v31
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:60
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_min_f32_e32 v14, v14, v31
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_min_f32_e32 v15, v15, v31
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:68
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_min_f32_e32 v16, v16, v31
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:72
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_min_f32_e32 v17, v17, v31
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:76
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_min_f32_e32 v18, v18, v31
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:80
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_min_f32_e32 v19, v19, v31
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:84
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_min_f32_e32 v20, v20, v31
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:88
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_min_f32_e32 v21, v21, v31
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:92
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_min_f32_e32 v22, v22, v31
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:96
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_min_f32_e32 v23, v23, v31
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:100
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_min_f32_e32 v24, v24, v31
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:104
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_min_f32_e32 v25, v25, v31
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:108
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_min_f32_e32 v26, v26, v31
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:112
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_min_f32_e32 v27, v27, v31
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_min_f32_e32 v28, v28, v31
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:120
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_min_f32_e32 v29, v29, v31
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:124
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_min_f32_e32 v30, v30, v31
+; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-SDAG-NEXT: v_min_f32_e32 v31, v31, v32
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_minimumnum_v32f16:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v6, v6
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v7, v7
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v8, v8
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v9, v9
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v10, v10
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v11, v11
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v12, v12
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v13, v13
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v14, v14
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v15, v15
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v16, v16
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v17, v17
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v18, v18
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v19, v19
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v20, v20
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v21, v21
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v22, v22
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v23, v23
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v24, v24
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v25, v25
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v26, v26
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v27, v27
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v28, v28
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v29, v29
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v30, v30
+; GFX7-GISEL-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:128
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v31
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:8
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v32, v32
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_min_f32_e32 v1, v1, v31
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:12
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_min_f32_e32 v2, v2, v31
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:16
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_min_f32_e32 v3, v3, v31
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:20
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_min_f32_e32 v4, v4, v31
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:24
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v4, v4
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_min_f32_e32 v5, v5, v31
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v5, v5
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_min_f32_e32 v6, v6, v31
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:32
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v6, v6
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_min_f32_e32 v7, v7, v31
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:36
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v7, v7
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_min_f32_e32 v8, v8, v31
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:40
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v8, v8
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_min_f32_e32 v9, v9, v31
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:44
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v9, v9
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_min_f32_e32 v10, v10, v31
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:48
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v10, v10
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_min_f32_e32 v11, v11, v31
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:52
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v11, v11
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_min_f32_e32 v12, v12, v31
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:56
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v12, v12
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_min_f32_e32 v13, v13, v31
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:60
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v13, v13
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_min_f32_e32 v14, v14, v31
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:64
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v14, v14
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_min_f32_e32 v15, v15, v31
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:68
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v15, v15
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_min_f32_e32 v16, v16, v31
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:72
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v16, v16
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_min_f32_e32 v17, v17, v31
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:76
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v17, v17
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_min_f32_e32 v18, v18, v31
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:80
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v18, v18
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_min_f32_e32 v19, v19, v31
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:84
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v19, v19
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_min_f32_e32 v20, v20, v31
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:88
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v20, v20
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_min_f32_e32 v21, v21, v31
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:92
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v21, v21
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_min_f32_e32 v22, v22, v31
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:96
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v22, v22
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_min_f32_e32 v23, v23, v31
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:100
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v23, v23
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_min_f32_e32 v24, v24, v31
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:104
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v24, v24
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_min_f32_e32 v25, v25, v31
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:108
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v25, v25
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_min_f32_e32 v26, v26, v31
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:112
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v26, v26
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_min_f32_e32 v27, v27, v31
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v27, v27
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_min_f32_e32 v28, v28, v31
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:120
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v28, v28
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_min_f32_e32 v29, v29, v31
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:124
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v29, v29
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_min_f32_e32 v30, v30, v31
+; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v30, v30
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31
+; GFX7-GISEL-NEXT: v_min_f32_e32 v31, v31, v32
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v31, v31
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_minimumnum_v32f16:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
+; GFX8-SDAG-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
+; GFX8-SDAG-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
+; GFX8-SDAG-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
+; GFX8-SDAG-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
+; GFX8-SDAG-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
+; GFX8-SDAG-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
+; GFX8-SDAG-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
+; GFX8-SDAG-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+; GFX8-SDAG-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; GFX8-SDAG-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; GFX8-SDAG-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v38, v27, v27 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v39, v11, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v48, v26, v26 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v49, v10, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v50, v25, v25 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v51, v9, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v40, v22, v22 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v41, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v58, v17, v17 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v59, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_e32 v17, v17, v17
+; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v52, v24, v24 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v53, v8, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v54, v23, v23 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v55, v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v42, v21, v21 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v43, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v44, v20, v20 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v45, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v46, v19, v19 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v47, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v56, v18, v18 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v57, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_min_f16_sdwa v38, v39, v38 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_min_f16_sdwa v39, v49, v48 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_min_f16_sdwa v48, v51, v50 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_min_f16_sdwa v51, v41, v40 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_min_f16_sdwa v40, v59, v58 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_min_f16_e32 v1, v1, v17
+; GFX8-SDAG-NEXT: v_min_f16_sdwa v49, v53, v52 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_min_f16_sdwa v50, v55, v54 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_min_f16_sdwa v52, v43, v42 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_min_f16_sdwa v53, v45, v44 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_min_f16_sdwa v54, v47, v46 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_min_f16_sdwa v55, v57, v56 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_or_b32_e32 v1, v1, v40
+; GFX8-SDAG-NEXT: buffer_load_dword v59, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; GFX8-SDAG-NEXT: buffer_load_dword v58, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
+; GFX8-SDAG-NEXT: buffer_load_dword v57, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
+; GFX8-SDAG-NEXT: buffer_load_dword v56, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
+; GFX8-SDAG-NEXT: buffer_load_dword v47, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
+; GFX8-SDAG-NEXT: buffer_load_dword v46, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
+; GFX8-SDAG-NEXT: buffer_load_dword v45, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
+; GFX8-SDAG-NEXT: buffer_load_dword v44, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
+; GFX8-SDAG-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
+; GFX8-SDAG-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
+; GFX8-SDAG-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
+; GFX8-SDAG-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
+; GFX8-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v32, v30, v30 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v33, v14, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v34, v29, v29 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v35, v13, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v36, v28, v28 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v37, v12, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_min_f16_sdwa v32, v33, v32 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v33, v16, v16 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_min_f16_sdwa v34, v35, v34 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v35, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_min_f16_sdwa v36, v37, v36 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v37, v15, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_e32 v15, v15, v15
+; GFX8-SDAG-NEXT: v_min_f16_sdwa v33, v35, v33 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_max_f16_e32 v30, v30, v30
+; GFX8-SDAG-NEXT: v_max_f16_e32 v14, v14, v14
+; GFX8-SDAG-NEXT: v_max_f16_e32 v29, v29, v29
+; GFX8-SDAG-NEXT: v_max_f16_e32 v13, v13, v13
+; GFX8-SDAG-NEXT: v_max_f16_e32 v28, v28, v28
+; GFX8-SDAG-NEXT: v_max_f16_e32 v12, v12, v12
+; GFX8-SDAG-NEXT: v_max_f16_e32 v27, v27, v27
+; GFX8-SDAG-NEXT: v_max_f16_e32 v11, v11, v11
+; GFX8-SDAG-NEXT: v_max_f16_e32 v26, v26, v26
+; GFX8-SDAG-NEXT: v_max_f16_e32 v10, v10, v10
+; GFX8-SDAG-NEXT: v_max_f16_e32 v25, v25, v25
+; GFX8-SDAG-NEXT: v_max_f16_e32 v9, v9, v9
+; GFX8-SDAG-NEXT: v_max_f16_e32 v24, v24, v24
+; GFX8-SDAG-NEXT: v_max_f16_e32 v8, v8, v8
+; GFX8-SDAG-NEXT: v_max_f16_e32 v23, v23, v23
+; GFX8-SDAG-NEXT: v_max_f16_e32 v7, v7, v7
+; GFX8-SDAG-NEXT: v_max_f16_e32 v22, v22, v22
+; GFX8-SDAG-NEXT: v_max_f16_e32 v6, v6, v6
+; GFX8-SDAG-NEXT: v_max_f16_e32 v21, v21, v21
+; GFX8-SDAG-NEXT: v_max_f16_e32 v5, v5, v5
+; GFX8-SDAG-NEXT: v_max_f16_e32 v20, v20, v20
+; GFX8-SDAG-NEXT: v_max_f16_e32 v4, v4, v4
+; GFX8-SDAG-NEXT: v_max_f16_e32 v19, v19, v19
+; GFX8-SDAG-NEXT: v_max_f16_e32 v3, v3, v3
+; GFX8-SDAG-NEXT: v_max_f16_e32 v18, v18, v18
+; GFX8-SDAG-NEXT: v_max_f16_e32 v2, v2, v2
+; GFX8-SDAG-NEXT: v_max_f16_e32 v16, v16, v16
+; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX8-SDAG-NEXT: v_min_f16_e32 v14, v14, v30
+; GFX8-SDAG-NEXT: v_min_f16_e32 v13, v13, v29
+; GFX8-SDAG-NEXT: v_min_f16_e32 v12, v12, v28
+; GFX8-SDAG-NEXT: v_min_f16_e32 v11, v11, v27
+; GFX8-SDAG-NEXT: v_min_f16_e32 v10, v10, v26
+; GFX8-SDAG-NEXT: v_min_f16_e32 v9, v9, v25
+; GFX8-SDAG-NEXT: v_min_f16_e32 v8, v8, v24
+; GFX8-SDAG-NEXT: v_min_f16_e32 v7, v7, v23
+; GFX8-SDAG-NEXT: v_min_f16_e32 v6, v6, v22
+; GFX8-SDAG-NEXT: v_min_f16_e32 v5, v5, v21
+; GFX8-SDAG-NEXT: v_min_f16_e32 v4, v4, v20
+; GFX8-SDAG-NEXT: v_min_f16_e32 v3, v3, v19
+; GFX8-SDAG-NEXT: v_min_f16_e32 v2, v2, v18
+; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v16
+; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v33
+; GFX8-SDAG-NEXT: v_or_b32_e32 v2, v2, v55
+; GFX8-SDAG-NEXT: v_or_b32_e32 v3, v3, v54
+; GFX8-SDAG-NEXT: v_or_b32_e32 v4, v4, v53
+; GFX8-SDAG-NEXT: v_or_b32_e32 v5, v5, v52
+; GFX8-SDAG-NEXT: v_or_b32_e32 v6, v6, v51
+; GFX8-SDAG-NEXT: v_or_b32_e32 v7, v7, v50
+; GFX8-SDAG-NEXT: v_or_b32_e32 v8, v8, v49
+; GFX8-SDAG-NEXT: v_or_b32_e32 v9, v9, v48
+; GFX8-SDAG-NEXT: v_or_b32_e32 v10, v10, v39
+; GFX8-SDAG-NEXT: v_or_b32_e32 v11, v11, v38
+; GFX8-SDAG-NEXT: v_or_b32_e32 v12, v12, v36
+; GFX8-SDAG-NEXT: v_or_b32_e32 v13, v13, v34
+; GFX8-SDAG-NEXT: v_or_b32_e32 v14, v14, v32
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v35, v31, v31 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_e32 v31, v31, v31
+; GFX8-SDAG-NEXT: v_min_f16_sdwa v35, v37, v35 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_min_f16_e32 v15, v15, v31
+; GFX8-SDAG-NEXT: v_or_b32_e32 v15, v15, v35
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_minimumnum_v32f16:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_max_f16_e32 v31, v0, v0
+; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v16, v16
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v16, v16, v16 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_min_f16_e32 v31, v31, v32
+; GFX8-GISEL-NEXT: v_min_f16_sdwa v0, v0, v16 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_max_f16_e32 v16, v1, v1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v17, v17
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v17, v17, v17 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_min_f16_e32 v16, v16, v32
+; GFX8-GISEL-NEXT: v_min_f16_sdwa v1, v1, v17 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_max_f16_e32 v17, v2, v2
+; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v18, v18
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v2, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v18, v18, v18 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_min_f16_e32 v17, v17, v32
+; GFX8-GISEL-NEXT: v_min_f16_sdwa v2, v2, v18 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_max_f16_e32 v18, v3, v3
+; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v19, v19
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v3, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v19, v19, v19 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_min_f16_e32 v18, v18, v32
+; GFX8-GISEL-NEXT: v_min_f16_sdwa v3, v3, v19 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_max_f16_e32 v19, v4, v4
+; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v20, v20
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v4, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v20, v20, v20 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_min_f16_e32 v19, v19, v32
+; GFX8-GISEL-NEXT: v_min_f16_sdwa v4, v4, v20 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_max_f16_e32 v20, v5, v5
+; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v21, v21
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v5, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v21, v21, v21 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_min_f16_e32 v20, v20, v32
+; GFX8-GISEL-NEXT: v_min_f16_sdwa v5, v5, v21 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_max_f16_e32 v21, v6, v6
+; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v22, v22
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v6, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v22, v22, v22 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_min_f16_e32 v21, v21, v32
+; GFX8-GISEL-NEXT: v_min_f16_sdwa v6, v6, v22 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_max_f16_e32 v22, v7, v7
+; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v23, v23
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v7, v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v23, v23, v23 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_min_f16_e32 v22, v22, v32
+; GFX8-GISEL-NEXT: v_min_f16_sdwa v7, v7, v23 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_max_f16_e32 v23, v8, v8
+; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v24, v24
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v8, v8, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v24, v24, v24 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_min_f16_e32 v23, v23, v32
+; GFX8-GISEL-NEXT: v_min_f16_sdwa v8, v8, v24 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_max_f16_e32 v24, v9, v9
+; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v25, v25
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v9, v9, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v25, v25, v25 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_min_f16_e32 v24, v24, v32
+; GFX8-GISEL-NEXT: v_min_f16_sdwa v9, v9, v25 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_max_f16_e32 v25, v10, v10
+; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v26, v26
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v10, v10, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v26, v26, v26 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_min_f16_e32 v25, v25, v32
+; GFX8-GISEL-NEXT: v_min_f16_sdwa v10, v10, v26 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_max_f16_e32 v26, v11, v11
+; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v27, v27
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v11, v11, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v27, v27, v27 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_min_f16_e32 v26, v26, v32
+; GFX8-GISEL-NEXT: v_min_f16_sdwa v11, v11, v27 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_max_f16_e32 v27, v12, v12
+; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v28, v28
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v12, v12, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v28, v28, v28 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_min_f16_e32 v27, v27, v32
+; GFX8-GISEL-NEXT: v_min_f16_sdwa v12, v12, v28 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_max_f16_e32 v28, v13, v13
+; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v29, v29
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v13, v13, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v29, v29, v29 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_min_f16_e32 v28, v28, v32
+; GFX8-GISEL-NEXT: v_min_f16_sdwa v13, v13, v29 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_max_f16_e32 v29, v14, v14
+; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v30, v30
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v14, v14, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v30, v30, v30 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_min_f16_sdwa v14, v14, v30 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: buffer_load_dword v30, off, s[0:3], s32
+; GFX8-GISEL-NEXT: v_min_f16_e32 v29, v29, v32
+; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v15, v15
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v15, v15, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v31, v0
+; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v16, v1
+; GFX8-GISEL-NEXT: v_or_b32_e32 v2, v17, v2
+; GFX8-GISEL-NEXT: v_or_b32_e32 v3, v18, v3
+; GFX8-GISEL-NEXT: v_or_b32_e32 v4, v19, v4
+; GFX8-GISEL-NEXT: v_or_b32_e32 v5, v20, v5
+; GFX8-GISEL-NEXT: v_or_b32_e32 v6, v21, v6
+; GFX8-GISEL-NEXT: v_or_b32_e32 v7, v22, v7
+; GFX8-GISEL-NEXT: v_or_b32_e32 v8, v23, v8
+; GFX8-GISEL-NEXT: v_or_b32_e32 v9, v24, v9
+; GFX8-GISEL-NEXT: v_or_b32_e32 v10, v25, v10
+; GFX8-GISEL-NEXT: v_or_b32_e32 v11, v26, v11
+; GFX8-GISEL-NEXT: v_or_b32_e32 v12, v27, v12
+; GFX8-GISEL-NEXT: v_or_b32_e32 v13, v28, v13
+; GFX8-GISEL-NEXT: v_or_b32_e32 v14, v29, v14
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: v_max_f16_e32 v33, v30, v30
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v30, v30, v30 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_min_f16_e32 v32, v32, v33
+; GFX8-GISEL-NEXT: v_min_f16_sdwa v15, v15, v30 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_or_b32_e32 v15, v32, v15
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_minimumnum_v32f16:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_pk_max_f16 v16, v16, v16
+; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX900-SDAG-NEXT: v_pk_min_f16 v0, v0, v16
+; GFX900-SDAG-NEXT: v_pk_max_f16 v16, v17, v17
+; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX900-SDAG-NEXT: v_pk_min_f16 v1, v1, v16
+; GFX900-SDAG-NEXT: v_pk_max_f16 v16, v18, v18
+; GFX900-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX900-SDAG-NEXT: v_pk_min_f16 v2, v2, v16
+; GFX900-SDAG-NEXT: v_pk_max_f16 v16, v19, v19
+; GFX900-SDAG-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX900-SDAG-NEXT: v_pk_min_f16 v3, v3, v16
+; GFX900-SDAG-NEXT: buffer_load_dword v16, off, s[0:3], s32
+; GFX900-SDAG-NEXT: v_pk_max_f16 v17, v20, v20
+; GFX900-SDAG-NEXT: v_pk_max_f16 v4, v4, v4
+; GFX900-SDAG-NEXT: v_pk_max_f16 v18, v21, v21
+; GFX900-SDAG-NEXT: v_pk_max_f16 v5, v5, v5
+; GFX900-SDAG-NEXT: v_pk_max_f16 v19, v22, v22
+; GFX900-SDAG-NEXT: v_pk_max_f16 v6, v6, v6
+; GFX900-SDAG-NEXT: v_pk_max_f16 v20, v23, v23
+; GFX900-SDAG-NEXT: v_pk_max_f16 v7, v7, v7
+; GFX900-SDAG-NEXT: v_pk_max_f16 v21, v24, v24
+; GFX900-SDAG-NEXT: v_pk_max_f16 v8, v8, v8
+; GFX900-SDAG-NEXT: v_pk_max_f16 v22, v25, v25
+; GFX900-SDAG-NEXT: v_pk_max_f16 v9, v9, v9
+; GFX900-SDAG-NEXT: v_pk_max_f16 v23, v26, v26
+; GFX900-SDAG-NEXT: v_pk_max_f16 v10, v10, v10
+; GFX900-SDAG-NEXT: v_pk_max_f16 v24, v27, v27
+; GFX900-SDAG-NEXT: v_pk_max_f16 v11, v11, v11
+; GFX900-SDAG-NEXT: v_pk_max_f16 v25, v28, v28
+; GFX900-SDAG-NEXT: v_pk_max_f16 v12, v12, v12
+; GFX900-SDAG-NEXT: v_pk_max_f16 v26, v29, v29
+; GFX900-SDAG-NEXT: v_pk_max_f16 v13, v13, v13
+; GFX900-SDAG-NEXT: v_pk_max_f16 v27, v30, v30
+; GFX900-SDAG-NEXT: v_pk_max_f16 v14, v14, v14
+; GFX900-SDAG-NEXT: v_pk_max_f16 v15, v15, v15
+; GFX900-SDAG-NEXT: v_pk_min_f16 v4, v4, v17
+; GFX900-SDAG-NEXT: v_pk_min_f16 v5, v5, v18
+; GFX900-SDAG-NEXT: v_pk_min_f16 v6, v6, v19
+; GFX900-SDAG-NEXT: v_pk_min_f16 v7, v7, v20
+; GFX900-SDAG-NEXT: v_pk_min_f16 v8, v8, v21
+; GFX900-SDAG-NEXT: v_pk_min_f16 v9, v9, v22
+; GFX900-SDAG-NEXT: v_pk_min_f16 v10, v10, v23
+; GFX900-SDAG-NEXT: v_pk_min_f16 v11, v11, v24
+; GFX900-SDAG-NEXT: v_pk_min_f16 v12, v12, v25
+; GFX900-SDAG-NEXT: v_pk_min_f16 v13, v13, v26
+; GFX900-SDAG-NEXT: v_pk_min_f16 v14, v14, v27
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX900-SDAG-NEXT: v_pk_max_f16 v16, v16, v16
+; GFX900-SDAG-NEXT: v_pk_min_f16 v15, v15, v16
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_minimumnum_v32f16:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX900-GISEL-NEXT: v_pk_max_f16 v16, v16, v16
+; GFX900-GISEL-NEXT: v_pk_min_f16 v0, v0, v16
+; GFX900-GISEL-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX900-GISEL-NEXT: v_pk_max_f16 v16, v17, v17
+; GFX900-GISEL-NEXT: v_pk_min_f16 v1, v1, v16
+; GFX900-GISEL-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX900-GISEL-NEXT: v_pk_max_f16 v16, v18, v18
+; GFX900-GISEL-NEXT: v_pk_min_f16 v2, v2, v16
+; GFX900-GISEL-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX900-GISEL-NEXT: v_pk_max_f16 v16, v19, v19
+; GFX900-GISEL-NEXT: v_pk_min_f16 v3, v3, v16
+; GFX900-GISEL-NEXT: buffer_load_dword v16, off, s[0:3], s32
+; GFX900-GISEL-NEXT: v_pk_max_f16 v4, v4, v4
+; GFX900-GISEL-NEXT: v_pk_max_f16 v17, v20, v20
+; GFX900-GISEL-NEXT: v_pk_max_f16 v5, v5, v5
+; GFX900-GISEL-NEXT: v_pk_max_f16 v18, v21, v21
+; GFX900-GISEL-NEXT: v_pk_max_f16 v6, v6, v6
+; GFX900-GISEL-NEXT: v_pk_max_f16 v19, v22, v22
+; GFX900-GISEL-NEXT: v_pk_max_f16 v7, v7, v7
+; GFX900-GISEL-NEXT: v_pk_max_f16 v20, v23, v23
+; GFX900-GISEL-NEXT: v_pk_max_f16 v8, v8, v8
+; GFX900-GISEL-NEXT: v_pk_max_f16 v21, v24, v24
+; GFX900-GISEL-NEXT: v_pk_max_f16 v9, v9, v9
+; GFX900-GISEL-NEXT: v_pk_max_f16 v22, v25, v25
+; GFX900-GISEL-NEXT: v_pk_max_f16 v10, v10, v10
+; GFX900-GISEL-NEXT: v_pk_max_f16 v23, v26, v26
+; GFX900-GISEL-NEXT: v_pk_max_f16 v11, v11, v11
+; GFX900-GISEL-NEXT: v_pk_max_f16 v24, v27, v27
+; GFX900-GISEL-NEXT: v_pk_max_f16 v12, v12, v12
+; GFX900-GISEL-NEXT: v_pk_max_f16 v25, v28, v28
+; GFX900-GISEL-NEXT: v_pk_max_f16 v13, v13, v13
+; GFX900-GISEL-NEXT: v_pk_max_f16 v26, v29, v29
+; GFX900-GISEL-NEXT: v_pk_max_f16 v14, v14, v14
+; GFX900-GISEL-NEXT: v_pk_max_f16 v27, v30, v30
+; GFX900-GISEL-NEXT: v_pk_max_f16 v15, v15, v15
+; GFX900-GISEL-NEXT: v_pk_min_f16 v4, v4, v17
+; GFX900-GISEL-NEXT: v_pk_min_f16 v5, v5, v18
+; GFX900-GISEL-NEXT: v_pk_min_f16 v6, v6, v19
+; GFX900-GISEL-NEXT: v_pk_min_f16 v7, v7, v20
+; GFX900-GISEL-NEXT: v_pk_min_f16 v8, v8, v21
+; GFX900-GISEL-NEXT: v_pk_min_f16 v9, v9, v22
+; GFX900-GISEL-NEXT: v_pk_min_f16 v10, v10, v23
+; GFX900-GISEL-NEXT: v_pk_min_f16 v11, v11, v24
+; GFX900-GISEL-NEXT: v_pk_min_f16 v12, v12, v25
+; GFX900-GISEL-NEXT: v_pk_min_f16 v13, v13, v26
+; GFX900-GISEL-NEXT: v_pk_min_f16 v14, v14, v27
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX900-GISEL-NEXT: v_pk_max_f16 v16, v16, v16
+; GFX900-GISEL-NEXT: v_pk_min_f16 v15, v15, v16
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_minimumnum_v32f16:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: scratch_load_dword v31, off, s32
+; GFX950-SDAG-NEXT: v_pk_max_f16 v16, v16, v16
+; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX950-SDAG-NEXT: v_pk_max_f16 v17, v17, v17
+; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX950-SDAG-NEXT: v_pk_max_f16 v18, v18, v18
+; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX950-SDAG-NEXT: v_pk_max_f16 v19, v19, v19
+; GFX950-SDAG-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX950-SDAG-NEXT: v_pk_max_f16 v20, v20, v20
+; GFX950-SDAG-NEXT: v_pk_max_f16 v4, v4, v4
+; GFX950-SDAG-NEXT: v_pk_max_f16 v21, v21, v21
+; GFX950-SDAG-NEXT: v_pk_max_f16 v5, v5, v5
+; GFX950-SDAG-NEXT: v_pk_max_f16 v22, v22, v22
+; GFX950-SDAG-NEXT: v_pk_max_f16 v6, v6, v6
+; GFX950-SDAG-NEXT: v_pk_max_f16 v23, v23, v23
+; GFX950-SDAG-NEXT: v_pk_max_f16 v7, v7, v7
+; GFX950-SDAG-NEXT: v_pk_max_f16 v24, v24, v24
+; GFX950-SDAG-NEXT: v_pk_max_f16 v8, v8, v8
+; GFX950-SDAG-NEXT: v_pk_max_f16 v25, v25, v25
+; GFX950-SDAG-NEXT: v_pk_max_f16 v9, v9, v9
+; GFX950-SDAG-NEXT: v_pk_max_f16 v26, v26, v26
+; GFX950-SDAG-NEXT: v_pk_max_f16 v10, v10, v10
+; GFX950-SDAG-NEXT: v_pk_max_f16 v27, v27, v27
+; GFX950-SDAG-NEXT: v_pk_max_f16 v11, v11, v11
+; GFX950-SDAG-NEXT: v_pk_max_f16 v28, v28, v28
+; GFX950-SDAG-NEXT: v_pk_max_f16 v12, v12, v12
+; GFX950-SDAG-NEXT: v_pk_max_f16 v29, v29, v29
+; GFX950-SDAG-NEXT: v_pk_max_f16 v13, v13, v13
+; GFX950-SDAG-NEXT: v_pk_max_f16 v30, v30, v30
+; GFX950-SDAG-NEXT: v_pk_max_f16 v14, v14, v14
+; GFX950-SDAG-NEXT: v_pk_max_f16 v15, v15, v15
+; GFX950-SDAG-NEXT: v_pk_min_f16 v0, v0, v16
+; GFX950-SDAG-NEXT: v_pk_min_f16 v1, v1, v17
+; GFX950-SDAG-NEXT: v_pk_min_f16 v2, v2, v18
+; GFX950-SDAG-NEXT: v_pk_min_f16 v3, v3, v19
+; GFX950-SDAG-NEXT: v_pk_min_f16 v4, v4, v20
+; GFX950-SDAG-NEXT: v_pk_min_f16 v5, v5, v21
+; GFX950-SDAG-NEXT: v_pk_min_f16 v6, v6, v22
+; GFX950-SDAG-NEXT: v_pk_min_f16 v7, v7, v23
+; GFX950-SDAG-NEXT: v_pk_min_f16 v8, v8, v24
+; GFX950-SDAG-NEXT: v_pk_min_f16 v9, v9, v25
+; GFX950-SDAG-NEXT: v_pk_min_f16 v10, v10, v26
+; GFX950-SDAG-NEXT: v_pk_min_f16 v11, v11, v27
+; GFX950-SDAG-NEXT: v_pk_min_f16 v12, v12, v28
+; GFX950-SDAG-NEXT: v_pk_min_f16 v13, v13, v29
+; GFX950-SDAG-NEXT: v_pk_min_f16 v14, v14, v30
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: v_pk_max_f16 v16, v31, v31
+; GFX950-SDAG-NEXT: s_nop 0
+; GFX950-SDAG-NEXT: v_pk_min_f16 v15, v15, v16
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_minimumnum_v32f16:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: scratch_load_dword v31, off, s32
+; GFX950-GISEL-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX950-GISEL-NEXT: v_pk_max_f16 v16, v16, v16
+; GFX950-GISEL-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX950-GISEL-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX950-GISEL-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX950-GISEL-NEXT: v_pk_max_f16 v4, v4, v4
+; GFX950-GISEL-NEXT: v_pk_max_f16 v5, v5, v5
+; GFX950-GISEL-NEXT: v_pk_max_f16 v6, v6, v6
+; GFX950-GISEL-NEXT: v_pk_max_f16 v7, v7, v7
+; GFX950-GISEL-NEXT: v_pk_max_f16 v8, v8, v8
+; GFX950-GISEL-NEXT: v_pk_max_f16 v9, v9, v9
+; GFX950-GISEL-NEXT: v_pk_max_f16 v10, v10, v10
+; GFX950-GISEL-NEXT: v_pk_max_f16 v11, v11, v11
+; GFX950-GISEL-NEXT: v_pk_max_f16 v12, v12, v12
+; GFX950-GISEL-NEXT: v_pk_max_f16 v13, v13, v13
+; GFX950-GISEL-NEXT: v_pk_max_f16 v14, v14, v14
+; GFX950-GISEL-NEXT: v_pk_max_f16 v15, v15, v15
+; GFX950-GISEL-NEXT: v_pk_max_f16 v17, v17, v17
+; GFX950-GISEL-NEXT: v_pk_max_f16 v18, v18, v18
+; GFX950-GISEL-NEXT: v_pk_max_f16 v19, v19, v19
+; GFX950-GISEL-NEXT: v_pk_max_f16 v20, v20, v20
+; GFX950-GISEL-NEXT: v_pk_max_f16 v21, v21, v21
+; GFX950-GISEL-NEXT: v_pk_max_f16 v22, v22, v22
+; GFX950-GISEL-NEXT: v_pk_max_f16 v23, v23, v23
+; GFX950-GISEL-NEXT: v_pk_max_f16 v24, v24, v24
+; GFX950-GISEL-NEXT: v_pk_max_f16 v25, v25, v25
+; GFX950-GISEL-NEXT: v_pk_max_f16 v26, v26, v26
+; GFX950-GISEL-NEXT: v_pk_max_f16 v27, v27, v27
+; GFX950-GISEL-NEXT: v_pk_max_f16 v28, v28, v28
+; GFX950-GISEL-NEXT: v_pk_max_f16 v29, v29, v29
+; GFX950-GISEL-NEXT: v_pk_max_f16 v30, v30, v30
+; GFX950-GISEL-NEXT: v_pk_min_f16 v0, v0, v16
+; GFX950-GISEL-NEXT: v_pk_min_f16 v1, v1, v17
+; GFX950-GISEL-NEXT: v_pk_min_f16 v2, v2, v18
+; GFX950-GISEL-NEXT: v_pk_min_f16 v3, v3, v19
+; GFX950-GISEL-NEXT: v_pk_min_f16 v4, v4, v20
+; GFX950-GISEL-NEXT: v_pk_min_f16 v5, v5, v21
+; GFX950-GISEL-NEXT: v_pk_min_f16 v6, v6, v22
+; GFX950-GISEL-NEXT: v_pk_min_f16 v7, v7, v23
+; GFX950-GISEL-NEXT: v_pk_min_f16 v8, v8, v24
+; GFX950-GISEL-NEXT: v_pk_min_f16 v9, v9, v25
+; GFX950-GISEL-NEXT: v_pk_min_f16 v10, v10, v26
+; GFX950-GISEL-NEXT: v_pk_min_f16 v11, v11, v27
+; GFX950-GISEL-NEXT: v_pk_min_f16 v12, v12, v28
+; GFX950-GISEL-NEXT: v_pk_min_f16 v13, v13, v29
+; GFX950-GISEL-NEXT: v_pk_min_f16 v14, v14, v30
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-GISEL-NEXT: v_pk_max_f16 v16, v31, v31
+; GFX950-GISEL-NEXT: s_nop 0
+; GFX950-GISEL-NEXT: v_pk_min_f16 v15, v15, v16
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_minimumnum_v32f16:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32
+; GFX10-SDAG-NEXT: v_pk_max_f16 v16, v16, v16
+; GFX10-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX10-SDAG-NEXT: v_pk_max_f16 v17, v17, v17
+; GFX10-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX10-SDAG-NEXT: v_pk_max_f16 v18, v18, v18
+; GFX10-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX10-SDAG-NEXT: v_pk_max_f16 v19, v19, v19
+; GFX10-SDAG-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX10-SDAG-NEXT: v_pk_max_f16 v20, v20, v20
+; GFX10-SDAG-NEXT: v_pk_max_f16 v4, v4, v4
+; GFX10-SDAG-NEXT: v_pk_max_f16 v21, v21, v21
+; GFX10-SDAG-NEXT: v_pk_max_f16 v5, v5, v5
+; GFX10-SDAG-NEXT: v_pk_max_f16 v22, v22, v22
+; GFX10-SDAG-NEXT: v_pk_max_f16 v6, v6, v6
+; GFX10-SDAG-NEXT: v_pk_max_f16 v23, v23, v23
+; GFX10-SDAG-NEXT: v_pk_max_f16 v7, v7, v7
+; GFX10-SDAG-NEXT: v_pk_max_f16 v24, v24, v24
+; GFX10-SDAG-NEXT: v_pk_max_f16 v8, v8, v8
+; GFX10-SDAG-NEXT: v_pk_max_f16 v25, v25, v25
+; GFX10-SDAG-NEXT: v_pk_max_f16 v9, v9, v9
+; GFX10-SDAG-NEXT: v_pk_max_f16 v26, v26, v26
+; GFX10-SDAG-NEXT: v_pk_max_f16 v10, v10, v10
+; GFX10-SDAG-NEXT: v_pk_max_f16 v27, v27, v27
+; GFX10-SDAG-NEXT: v_pk_max_f16 v11, v11, v11
+; GFX10-SDAG-NEXT: v_pk_max_f16 v28, v28, v28
+; GFX10-SDAG-NEXT: v_pk_max_f16 v12, v12, v12
+; GFX10-SDAG-NEXT: v_pk_max_f16 v29, v29, v29
+; GFX10-SDAG-NEXT: v_pk_max_f16 v13, v13, v13
+; GFX10-SDAG-NEXT: v_pk_max_f16 v30, v30, v30
+; GFX10-SDAG-NEXT: v_pk_max_f16 v14, v14, v14
+; GFX10-SDAG-NEXT: v_pk_max_f16 v15, v15, v15
+; GFX10-SDAG-NEXT: v_pk_min_f16 v0, v0, v16
+; GFX10-SDAG-NEXT: v_pk_min_f16 v1, v1, v17
+; GFX10-SDAG-NEXT: v_pk_min_f16 v2, v2, v18
+; GFX10-SDAG-NEXT: v_pk_min_f16 v3, v3, v19
+; GFX10-SDAG-NEXT: v_pk_min_f16 v4, v4, v20
+; GFX10-SDAG-NEXT: v_pk_min_f16 v5, v5, v21
+; GFX10-SDAG-NEXT: v_pk_min_f16 v6, v6, v22
+; GFX10-SDAG-NEXT: v_pk_min_f16 v7, v7, v23
+; GFX10-SDAG-NEXT: v_pk_min_f16 v8, v8, v24
+; GFX10-SDAG-NEXT: v_pk_min_f16 v9, v9, v25
+; GFX10-SDAG-NEXT: v_pk_min_f16 v10, v10, v26
+; GFX10-SDAG-NEXT: v_pk_min_f16 v11, v11, v27
+; GFX10-SDAG-NEXT: v_pk_min_f16 v12, v12, v28
+; GFX10-SDAG-NEXT: v_pk_min_f16 v13, v13, v29
+; GFX10-SDAG-NEXT: v_pk_min_f16 v14, v14, v30
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-SDAG-NEXT: v_pk_max_f16 v16, v31, v31
+; GFX10-SDAG-NEXT: v_pk_min_f16 v15, v15, v16
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_minimumnum_v32f16:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32
+; GFX10-GISEL-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX10-GISEL-NEXT: v_pk_max_f16 v16, v16, v16
+; GFX10-GISEL-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX10-GISEL-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX10-GISEL-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX10-GISEL-NEXT: v_pk_max_f16 v4, v4, v4
+; GFX10-GISEL-NEXT: v_pk_max_f16 v5, v5, v5
+; GFX10-GISEL-NEXT: v_pk_max_f16 v6, v6, v6
+; GFX10-GISEL-NEXT: v_pk_max_f16 v7, v7, v7
+; GFX10-GISEL-NEXT: v_pk_max_f16 v8, v8, v8
+; GFX10-GISEL-NEXT: v_pk_max_f16 v9, v9, v9
+; GFX10-GISEL-NEXT: v_pk_max_f16 v10, v10, v10
+; GFX10-GISEL-NEXT: v_pk_max_f16 v11, v11, v11
+; GFX10-GISEL-NEXT: v_pk_max_f16 v12, v12, v12
+; GFX10-GISEL-NEXT: v_pk_max_f16 v13, v13, v13
+; GFX10-GISEL-NEXT: v_pk_max_f16 v14, v14, v14
+; GFX10-GISEL-NEXT: v_pk_max_f16 v15, v15, v15
+; GFX10-GISEL-NEXT: v_pk_max_f16 v17, v17, v17
+; GFX10-GISEL-NEXT: v_pk_max_f16 v18, v18, v18
+; GFX10-GISEL-NEXT: v_pk_max_f16 v19, v19, v19
+; GFX10-GISEL-NEXT: v_pk_max_f16 v20, v20, v20
+; GFX10-GISEL-NEXT: v_pk_max_f16 v21, v21, v21
+; GFX10-GISEL-NEXT: v_pk_max_f16 v22, v22, v22
+; GFX10-GISEL-NEXT: v_pk_max_f16 v23, v23, v23
+; GFX10-GISEL-NEXT: v_pk_max_f16 v24, v24, v24
+; GFX10-GISEL-NEXT: v_pk_max_f16 v25, v25, v25
+; GFX10-GISEL-NEXT: v_pk_max_f16 v26, v26, v26
+; GFX10-GISEL-NEXT: v_pk_max_f16 v27, v27, v27
+; GFX10-GISEL-NEXT: v_pk_max_f16 v28, v28, v28
+; GFX10-GISEL-NEXT: v_pk_max_f16 v29, v29, v29
+; GFX10-GISEL-NEXT: v_pk_max_f16 v30, v30, v30
+; GFX10-GISEL-NEXT: v_pk_min_f16 v0, v0, v16
+; GFX10-GISEL-NEXT: v_pk_min_f16 v1, v1, v17
+; GFX10-GISEL-NEXT: v_pk_min_f16 v2, v2, v18
+; GFX10-GISEL-NEXT: v_pk_min_f16 v3, v3, v19
+; GFX10-GISEL-NEXT: v_pk_min_f16 v4, v4, v20
+; GFX10-GISEL-NEXT: v_pk_min_f16 v5, v5, v21
+; GFX10-GISEL-NEXT: v_pk_min_f16 v6, v6, v22
+; GFX10-GISEL-NEXT: v_pk_min_f16 v7, v7, v23
+; GFX10-GISEL-NEXT: v_pk_min_f16 v8, v8, v24
+; GFX10-GISEL-NEXT: v_pk_min_f16 v9, v9, v25
+; GFX10-GISEL-NEXT: v_pk_min_f16 v10, v10, v26
+; GFX10-GISEL-NEXT: v_pk_min_f16 v11, v11, v27
+; GFX10-GISEL-NEXT: v_pk_min_f16 v12, v12, v28
+; GFX10-GISEL-NEXT: v_pk_min_f16 v13, v13, v29
+; GFX10-GISEL-NEXT: v_pk_min_f16 v14, v14, v30
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-GISEL-NEXT: v_pk_max_f16 v16, v31, v31
+; GFX10-GISEL-NEXT: v_pk_min_f16 v15, v15, v16
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_minimumnum_v32f16:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: scratch_load_b32 v31, off, s32
+; GFX11-SDAG-NEXT: v_pk_max_f16 v16, v16, v16
+; GFX11-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX11-SDAG-NEXT: v_pk_max_f16 v17, v17, v17
+; GFX11-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX11-SDAG-NEXT: v_pk_max_f16 v18, v18, v18
+; GFX11-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX11-SDAG-NEXT: v_pk_max_f16 v19, v19, v19
+; GFX11-SDAG-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX11-SDAG-NEXT: v_pk_max_f16 v20, v20, v20
+; GFX11-SDAG-NEXT: v_pk_max_f16 v4, v4, v4
+; GFX11-SDAG-NEXT: v_pk_max_f16 v21, v21, v21
+; GFX11-SDAG-NEXT: v_pk_max_f16 v5, v5, v5
+; GFX11-SDAG-NEXT: v_pk_max_f16 v22, v22, v22
+; GFX11-SDAG-NEXT: v_pk_max_f16 v6, v6, v6
+; GFX11-SDAG-NEXT: v_pk_max_f16 v23, v23, v23
+; GFX11-SDAG-NEXT: v_pk_max_f16 v7, v7, v7
+; GFX11-SDAG-NEXT: v_pk_max_f16 v24, v24, v24
+; GFX11-SDAG-NEXT: v_pk_max_f16 v8, v8, v8
+; GFX11-SDAG-NEXT: v_pk_max_f16 v25, v25, v25
+; GFX11-SDAG-NEXT: v_pk_max_f16 v9, v9, v9
+; GFX11-SDAG-NEXT: v_pk_max_f16 v26, v26, v26
+; GFX11-SDAG-NEXT: v_pk_max_f16 v10, v10, v10
+; GFX11-SDAG-NEXT: v_pk_max_f16 v27, v27, v27
+; GFX11-SDAG-NEXT: v_pk_max_f16 v11, v11, v11
+; GFX11-SDAG-NEXT: v_pk_max_f16 v28, v28, v28
+; GFX11-SDAG-NEXT: v_pk_max_f16 v12, v12, v12
+; GFX11-SDAG-NEXT: v_pk_max_f16 v29, v29, v29
+; GFX11-SDAG-NEXT: v_pk_max_f16 v13, v13, v13
+; GFX11-SDAG-NEXT: v_pk_max_f16 v30, v30, v30
+; GFX11-SDAG-NEXT: v_pk_max_f16 v14, v14, v14
+; GFX11-SDAG-NEXT: v_pk_max_f16 v15, v15, v15
+; GFX11-SDAG-NEXT: v_pk_min_f16 v0, v0, v16
+; GFX11-SDAG-NEXT: v_pk_min_f16 v1, v1, v17
+; GFX11-SDAG-NEXT: v_pk_min_f16 v2, v2, v18
+; GFX11-SDAG-NEXT: v_pk_min_f16 v3, v3, v19
+; GFX11-SDAG-NEXT: v_pk_min_f16 v4, v4, v20
+; GFX11-SDAG-NEXT: v_pk_min_f16 v5, v5, v21
+; GFX11-SDAG-NEXT: v_pk_min_f16 v6, v6, v22
+; GFX11-SDAG-NEXT: v_pk_min_f16 v7, v7, v23
+; GFX11-SDAG-NEXT: v_pk_min_f16 v8, v8, v24
+; GFX11-SDAG-NEXT: v_pk_min_f16 v9, v9, v25
+; GFX11-SDAG-NEXT: v_pk_min_f16 v10, v10, v26
+; GFX11-SDAG-NEXT: v_pk_min_f16 v11, v11, v27
+; GFX11-SDAG-NEXT: v_pk_min_f16 v12, v12, v28
+; GFX11-SDAG-NEXT: v_pk_min_f16 v13, v13, v29
+; GFX11-SDAG-NEXT: v_pk_min_f16 v14, v14, v30
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT: v_pk_max_f16 v16, v31, v31
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_pk_min_f16 v15, v15, v16
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_minimumnum_v32f16:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: scratch_load_b32 v31, off, s32
+; GFX11-GISEL-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX11-GISEL-NEXT: v_pk_max_f16 v16, v16, v16
+; GFX11-GISEL-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX11-GISEL-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX11-GISEL-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX11-GISEL-NEXT: v_pk_max_f16 v4, v4, v4
+; GFX11-GISEL-NEXT: v_pk_max_f16 v5, v5, v5
+; GFX11-GISEL-NEXT: v_pk_max_f16 v6, v6, v6
+; GFX11-GISEL-NEXT: v_pk_max_f16 v7, v7, v7
+; GFX11-GISEL-NEXT: v_pk_max_f16 v8, v8, v8
+; GFX11-GISEL-NEXT: v_pk_max_f16 v9, v9, v9
+; GFX11-GISEL-NEXT: v_pk_max_f16 v10, v10, v10
+; GFX11-GISEL-NEXT: v_pk_max_f16 v11, v11, v11
+; GFX11-GISEL-NEXT: v_pk_max_f16 v12, v12, v12
+; GFX11-GISEL-NEXT: v_pk_max_f16 v13, v13, v13
+; GFX11-GISEL-NEXT: v_pk_max_f16 v14, v14, v14
+; GFX11-GISEL-NEXT: v_pk_max_f16 v15, v15, v15
+; GFX11-GISEL-NEXT: v_pk_max_f16 v17, v17, v17
+; GFX11-GISEL-NEXT: v_pk_max_f16 v18, v18, v18
+; GFX11-GISEL-NEXT: v_pk_max_f16 v19, v19, v19
+; GFX11-GISEL-NEXT: v_pk_max_f16 v20, v20, v20
+; GFX11-GISEL-NEXT: v_pk_max_f16 v21, v21, v21
+; GFX11-GISEL-NEXT: v_pk_max_f16 v22, v22, v22
+; GFX11-GISEL-NEXT: v_pk_max_f16 v23, v23, v23
+; GFX11-GISEL-NEXT: v_pk_max_f16 v24, v24, v24
+; GFX11-GISEL-NEXT: v_pk_max_f16 v25, v25, v25
+; GFX11-GISEL-NEXT: v_pk_max_f16 v26, v26, v26
+; GFX11-GISEL-NEXT: v_pk_max_f16 v27, v27, v27
+; GFX11-GISEL-NEXT: v_pk_max_f16 v28, v28, v28
+; GFX11-GISEL-NEXT: v_pk_max_f16 v29, v29, v29
+; GFX11-GISEL-NEXT: v_pk_max_f16 v30, v30, v30
+; GFX11-GISEL-NEXT: v_pk_min_f16 v0, v0, v16
+; GFX11-GISEL-NEXT: v_pk_min_f16 v1, v1, v17
+; GFX11-GISEL-NEXT: v_pk_min_f16 v2, v2, v18
+; GFX11-GISEL-NEXT: v_pk_min_f16 v3, v3, v19
+; GFX11-GISEL-NEXT: v_pk_min_f16 v4, v4, v20
+; GFX11-GISEL-NEXT: v_pk_min_f16 v5, v5, v21
+; GFX11-GISEL-NEXT: v_pk_min_f16 v6, v6, v22
+; GFX11-GISEL-NEXT: v_pk_min_f16 v7, v7, v23
+; GFX11-GISEL-NEXT: v_pk_min_f16 v8, v8, v24
+; GFX11-GISEL-NEXT: v_pk_min_f16 v9, v9, v25
+; GFX11-GISEL-NEXT: v_pk_min_f16 v10, v10, v26
+; GFX11-GISEL-NEXT: v_pk_min_f16 v11, v11, v27
+; GFX11-GISEL-NEXT: v_pk_min_f16 v12, v12, v28
+; GFX11-GISEL-NEXT: v_pk_min_f16 v13, v13, v29
+; GFX11-GISEL-NEXT: v_pk_min_f16 v14, v14, v30
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT: v_pk_max_f16 v16, v31, v31
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_pk_min_f16 v15, v15, v16
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-SDAG-LABEL: v_minimumnum_v32f16:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: scratch_load_b32 v31, off, s32
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v16, v16, v16
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v17, v17, v17
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v18, v18, v18
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v2
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v19, v19, v19
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v3
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v20, v20, v20
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v4, v4, v4
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v21, v21, v21
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v5, v5, v5
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v22, v22, v22
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v6, v6, v6
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v23, v23, v23
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v7, v7, v7
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v24, v24, v24
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v8, v8, v8
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v25, v25, v25
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v9, v9, v9
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v26, v26, v26
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v10, v10, v10
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v27, v27, v27
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v11, v11, v11
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v28, v28, v28
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v12, v12, v12
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v29, v29, v29
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v13, v13, v13
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v30, v30, v30
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v14, v14, v14
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v15, v15, v15
+; GFX12-SDAG-NEXT: v_pk_min_num_f16 v0, v0, v16
+; GFX12-SDAG-NEXT: v_pk_min_num_f16 v1, v1, v17
+; GFX12-SDAG-NEXT: v_pk_min_num_f16 v2, v2, v18
+; GFX12-SDAG-NEXT: v_pk_min_num_f16 v3, v3, v19
+; GFX12-SDAG-NEXT: v_pk_min_num_f16 v4, v4, v20
+; GFX12-SDAG-NEXT: v_pk_min_num_f16 v5, v5, v21
+; GFX12-SDAG-NEXT: v_pk_min_num_f16 v6, v6, v22
+; GFX12-SDAG-NEXT: v_pk_min_num_f16 v7, v7, v23
+; GFX12-SDAG-NEXT: v_pk_min_num_f16 v8, v8, v24
+; GFX12-SDAG-NEXT: v_pk_min_num_f16 v9, v9, v25
+; GFX12-SDAG-NEXT: v_pk_min_num_f16 v10, v10, v26
+; GFX12-SDAG-NEXT: v_pk_min_num_f16 v11, v11, v27
+; GFX12-SDAG-NEXT: v_pk_min_num_f16 v12, v12, v28
+; GFX12-SDAG-NEXT: v_pk_min_num_f16 v13, v13, v29
+; GFX12-SDAG-NEXT: v_pk_min_num_f16 v14, v14, v30
+; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v16, v31, v31
+; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-SDAG-NEXT: v_pk_min_num_f16 v15, v15, v16
+; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GISEL-LABEL: v_minimumnum_v32f16:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: scratch_load_b32 v31, off, s32
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v16, v16, v16
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v2
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v3
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v4, v4, v4
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v5, v5, v5
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v6, v6, v6
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v7, v7, v7
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v8, v8, v8
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v9, v9, v9
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v10, v10, v10
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v11, v11, v11
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v12, v12, v12
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v13, v13, v13
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v14, v14, v14
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v15, v15, v15
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v17, v17, v17
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v18, v18, v18
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v19, v19, v19
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v20, v20, v20
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v21, v21, v21
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v22, v22, v22
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v23, v23, v23
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v24, v24, v24
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v25, v25, v25
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v26, v26, v26
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v27, v27, v27
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v28, v28, v28
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v29, v29, v29
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v30, v30, v30
+; GFX12-GISEL-NEXT: v_pk_min_num_f16 v0, v0, v16
+; GFX12-GISEL-NEXT: v_pk_min_num_f16 v1, v1, v17
+; GFX12-GISEL-NEXT: v_pk_min_num_f16 v2, v2, v18
+; GFX12-GISEL-NEXT: v_pk_min_num_f16 v3, v3, v19
+; GFX12-GISEL-NEXT: v_pk_min_num_f16 v4, v4, v20
+; GFX12-GISEL-NEXT: v_pk_min_num_f16 v5, v5, v21
+; GFX12-GISEL-NEXT: v_pk_min_num_f16 v6, v6, v22
+; GFX12-GISEL-NEXT: v_pk_min_num_f16 v7, v7, v23
+; GFX12-GISEL-NEXT: v_pk_min_num_f16 v8, v8, v24
+; GFX12-GISEL-NEXT: v_pk_min_num_f16 v9, v9, v25
+; GFX12-GISEL-NEXT: v_pk_min_num_f16 v10, v10, v26
+; GFX12-GISEL-NEXT: v_pk_min_num_f16 v11, v11, v27
+; GFX12-GISEL-NEXT: v_pk_min_num_f16 v12, v12, v28
+; GFX12-GISEL-NEXT: v_pk_min_num_f16 v13, v13, v29
+; GFX12-GISEL-NEXT: v_pk_min_num_f16 v14, v14, v30
+; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v16, v31, v31
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-GISEL-NEXT: v_pk_min_num_f16 v15, v15, v16
+; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
+ %result = call <32 x half> @llvm.minimumnum.v32f16(<32 x half> %x, <32 x half> %y)
+ ret <32 x half> %result
+}
+
+define <2 x float> @v_minimumnum_v2f32(<2 x float> %x, <2 x float> %y) {
+; GFX7-SDAG-LABEL: v_minimumnum_v2f32:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v2, 1.0, v2
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v2, 1.0, v3
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX7-SDAG-NEXT: v_min_f32_e32 v1, v1, v2
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_minimumnum_v2f32:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v2, 1.0, v2
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v3, 1.0, v3
+; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX7-GISEL-NEXT: v_min_f32_e32 v1, v1, v3
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_minimumnum_v2f32:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_mul_f32_e32 v2, 1.0, v2
+; GFX8-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX8-SDAG-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX8-SDAG-NEXT: v_mul_f32_e32 v2, 1.0, v3
+; GFX8-SDAG-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX8-SDAG-NEXT: v_min_f32_e32 v1, v1, v2
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_minimumnum_v2f32:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX8-GISEL-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX8-GISEL-NEXT: v_mul_f32_e32 v2, 1.0, v2
+; GFX8-GISEL-NEXT: v_mul_f32_e32 v3, 1.0, v3
+; GFX8-GISEL-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX8-GISEL-NEXT: v_min_f32_e32 v1, v1, v3
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: v_minimumnum_v2f32:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX9-SDAG-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX9-SDAG-NEXT: v_max_f32_e32 v2, v3, v3
+; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX9-SDAG-NEXT: v_min_f32_e32 v1, v1, v2
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_minimumnum_v2f32:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX900-GISEL-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX900-GISEL-NEXT: v_max_f32_e32 v3, v3, v3
+; GFX900-GISEL-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX900-GISEL-NEXT: v_min_f32_e32 v1, v1, v3
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_minimumnum_v2f32:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_pk_mul_f32 v[0:1], 1.0, v[0:1] op_sel_hi:[0,1]
+; GFX950-GISEL-NEXT: v_pk_mul_f32 v[2:3], 1.0, v[2:3] op_sel_hi:[0,1]
+; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-GISEL-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX950-GISEL-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX950-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX950-GISEL-NEXT: v_max_f32_e32 v2, v3, v3
+; GFX950-GISEL-NEXT: v_min_f32_e32 v1, v1, v2
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_minimumnum_v2f32:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX10-SDAG-NEXT: v_max_f32_e32 v3, v3, v3
+; GFX10-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX10-SDAG-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX10-SDAG-NEXT: v_min_f32_e32 v1, v1, v3
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_minimumnum_v2f32:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX10-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX10-GISEL-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX10-GISEL-NEXT: v_max_f32_e32 v3, v3, v3
+; GFX10-GISEL-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX10-GISEL-NEXT: v_min_f32_e32 v1, v1, v3
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_minimumnum_v2f32:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v3, v3, v3
+; GFX11-SDAG-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_dual_min_f32 v0, v0, v2 :: v_dual_min_f32 v1, v1, v3
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_minimumnum_v2f32:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
+; GFX11-GISEL-NEXT: v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v3, v3, v3
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_dual_min_f32 v0, v0, v2 :: v_dual_min_f32 v1, v1, v3
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-SDAG-LABEL: v_minimumnum_v2f32:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3
+; GFX12-SDAG-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
+; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-SDAG-NEXT: v_dual_min_num_f32 v0, v0, v2 :: v_dual_min_num_f32 v1, v1, v3
+; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GISEL-LABEL: v_minimumnum_v2f32:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
+; GFX12-GISEL-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-GISEL-NEXT: v_dual_min_num_f32 v0, v0, v2 :: v_dual_min_num_f32 v1, v1, v3
+; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call <2 x float> @llvm.minimumnum.v2f32(<2 x float> %x, <2 x float> %y)
ret <2 x float> %result
}
@@ -3794,87 +6591,190 @@ define <2 x float> @v_minimumnum_v2f32_nnan(<2 x float> %x, <2 x float> %y) {
}
define <3 x float> @v_minimumnum_v3f32(<3 x float> %x, <3 x float> %y) {
-; GFX7-LABEL: v_minimumnum_v3f32:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_mul_f32_e32 v3, 1.0, v3
-; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
-; GFX7-NEXT: v_min_f32_e32 v0, v0, v3
-; GFX7-NEXT: v_mul_f32_e32 v3, 1.0, v4
-; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
-; GFX7-NEXT: v_min_f32_e32 v1, v1, v3
-; GFX7-NEXT: v_mul_f32_e32 v3, 1.0, v5
-; GFX7-NEXT: v_mul_f32_e32 v2, 1.0, v2
-; GFX7-NEXT: v_min_f32_e32 v2, v2, v3
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_minimumnum_v3f32:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_mul_f32_e32 v3, 1.0, v3
-; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0
-; GFX8-NEXT: v_min_f32_e32 v0, v0, v3
-; GFX8-NEXT: v_mul_f32_e32 v3, 1.0, v4
-; GFX8-NEXT: v_mul_f32_e32 v1, 1.0, v1
-; GFX8-NEXT: v_min_f32_e32 v1, v1, v3
-; GFX8-NEXT: v_mul_f32_e32 v3, 1.0, v5
-; GFX8-NEXT: v_mul_f32_e32 v2, 1.0, v2
-; GFX8-NEXT: v_min_f32_e32 v2, v2, v3
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: v_minimumnum_v3f32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v3, v3, v3
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-NEXT: v_min_f32_e32 v0, v0, v3
-; GFX9-NEXT: v_max_f32_e32 v3, v4, v4
-; GFX9-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX9-NEXT: v_min_f32_e32 v1, v1, v3
-; GFX9-NEXT: v_max_f32_e32 v3, v5, v5
-; GFX9-NEXT: v_max_f32_e32 v2, v2, v2
-; GFX9-NEXT: v_min_f32_e32 v2, v2, v3
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_minimumnum_v3f32:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_max_f32_e32 v3, v3, v3
-; GFX10-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX10-NEXT: v_max_f32_e32 v4, v4, v4
-; GFX10-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX10-NEXT: v_max_f32_e32 v5, v5, v5
-; GFX10-NEXT: v_max_f32_e32 v2, v2, v2
-; GFX10-NEXT: v_min_f32_e32 v0, v0, v3
-; GFX10-NEXT: v_min_f32_e32 v1, v1, v4
-; GFX10-NEXT: v_min_f32_e32 v2, v2, v5
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: v_minimumnum_v3f32:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_dual_max_f32 v3, v3, v3 :: v_dual_max_f32 v0, v0, v0
-; GFX11-NEXT: v_dual_max_f32 v4, v4, v4 :: v_dual_max_f32 v1, v1, v1
-; GFX11-NEXT: v_dual_max_f32 v5, v5, v5 :: v_dual_max_f32 v2, v2, v2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_dual_min_f32 v0, v0, v3 :: v_dual_min_f32 v1, v1, v4
-; GFX11-NEXT: v_min_f32_e32 v2, v2, v5
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_minimumnum_v3f32:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_dual_max_num_f32 v3, v3, v3 :: v_dual_max_num_f32 v0, v0, v0
-; GFX12-NEXT: v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v1, v1, v1
-; GFX12-NEXT: v_dual_max_num_f32 v5, v5, v5 :: v_dual_max_num_f32 v2, v2, v2
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX12-NEXT: v_dual_min_num_f32 v0, v0, v3 :: v_dual_min_num_f32 v1, v1, v4
-; GFX12-NEXT: v_min_num_f32_e32 v2, v2, v5
-; GFX12-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_minimumnum_v3f32:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v3, 1.0, v3
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v3
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v3, 1.0, v4
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX7-SDAG-NEXT: v_min_f32_e32 v1, v1, v3
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v3, 1.0, v5
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v2, 1.0, v2
+; GFX7-SDAG-NEXT: v_min_f32_e32 v2, v2, v3
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_minimumnum_v3f32:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v2, 1.0, v2
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v3, 1.0, v3
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v4, 1.0, v4
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v5, 1.0, v5
+; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v3
+; GFX7-GISEL-NEXT: v_min_f32_e32 v1, v1, v4
+; GFX7-GISEL-NEXT: v_min_f32_e32 v2, v2, v5
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_minimumnum_v3f32:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_mul_f32_e32 v3, 1.0, v3
+; GFX8-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX8-SDAG-NEXT: v_min_f32_e32 v0, v0, v3
+; GFX8-SDAG-NEXT: v_mul_f32_e32 v3, 1.0, v4
+; GFX8-SDAG-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX8-SDAG-NEXT: v_min_f32_e32 v1, v1, v3
+; GFX8-SDAG-NEXT: v_mul_f32_e32 v3, 1.0, v5
+; GFX8-SDAG-NEXT: v_mul_f32_e32 v2, 1.0, v2
+; GFX8-SDAG-NEXT: v_min_f32_e32 v2, v2, v3
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_minimumnum_v3f32:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX8-GISEL-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX8-GISEL-NEXT: v_mul_f32_e32 v2, 1.0, v2
+; GFX8-GISEL-NEXT: v_mul_f32_e32 v3, 1.0, v3
+; GFX8-GISEL-NEXT: v_mul_f32_e32 v4, 1.0, v4
+; GFX8-GISEL-NEXT: v_mul_f32_e32 v5, 1.0, v5
+; GFX8-GISEL-NEXT: v_min_f32_e32 v0, v0, v3
+; GFX8-GISEL-NEXT: v_min_f32_e32 v1, v1, v4
+; GFX8-GISEL-NEXT: v_min_f32_e32 v2, v2, v5
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: v_minimumnum_v3f32:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_max_f32_e32 v3, v3, v3
+; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX9-SDAG-NEXT: v_min_f32_e32 v0, v0, v3
+; GFX9-SDAG-NEXT: v_max_f32_e32 v3, v4, v4
+; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX9-SDAG-NEXT: v_min_f32_e32 v1, v1, v3
+; GFX9-SDAG-NEXT: v_max_f32_e32 v3, v5, v5
+; GFX9-SDAG-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX9-SDAG-NEXT: v_min_f32_e32 v2, v2, v3
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_minimumnum_v3f32:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX900-GISEL-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX900-GISEL-NEXT: v_max_f32_e32 v3, v3, v3
+; GFX900-GISEL-NEXT: v_max_f32_e32 v4, v4, v4
+; GFX900-GISEL-NEXT: v_max_f32_e32 v5, v5, v5
+; GFX900-GISEL-NEXT: v_min_f32_e32 v0, v0, v3
+; GFX900-GISEL-NEXT: v_min_f32_e32 v1, v1, v4
+; GFX900-GISEL-NEXT: v_min_f32_e32 v2, v2, v5
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_minimumnum_v3f32:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_mov_b32_e32 v6, v3
+; GFX950-GISEL-NEXT: v_mov_b32_e32 v7, v4
+; GFX950-GISEL-NEXT: v_pk_mul_f32 v[0:1], 1.0, v[0:1] op_sel_hi:[0,1]
+; GFX950-GISEL-NEXT: v_pk_mul_f32 v[2:3], 1.0, v[2:3] op_sel_hi:[0,1]
+; GFX950-GISEL-NEXT: v_pk_mul_f32 v[6:7], 1.0, v[6:7] op_sel_hi:[0,1]
+; GFX950-GISEL-NEXT: v_mov_b32_e32 v4, v5
+; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-GISEL-NEXT: v_max_f32_e32 v3, v6, v6
+; GFX950-GISEL-NEXT: v_pk_mul_f32 v[4:5], 1.0, v[4:5] op_sel_hi:[0,1]
+; GFX950-GISEL-NEXT: v_min_f32_e32 v0, v0, v3
+; GFX950-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX950-GISEL-NEXT: v_max_f32_e32 v3, v7, v7
+; GFX950-GISEL-NEXT: v_min_f32_e32 v1, v1, v3
+; GFX950-GISEL-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX950-GISEL-NEXT: v_max_f32_e32 v3, v4, v4
+; GFX950-GISEL-NEXT: v_min_f32_e32 v2, v2, v3
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_minimumnum_v3f32:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_max_f32_e32 v3, v3, v3
+; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX10-SDAG-NEXT: v_max_f32_e32 v4, v4, v4
+; GFX10-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX10-SDAG-NEXT: v_max_f32_e32 v5, v5, v5
+; GFX10-SDAG-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX10-SDAG-NEXT: v_min_f32_e32 v0, v0, v3
+; GFX10-SDAG-NEXT: v_min_f32_e32 v1, v1, v4
+; GFX10-SDAG-NEXT: v_min_f32_e32 v2, v2, v5
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_minimumnum_v3f32:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX10-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX10-GISEL-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX10-GISEL-NEXT: v_max_f32_e32 v3, v3, v3
+; GFX10-GISEL-NEXT: v_max_f32_e32 v4, v4, v4
+; GFX10-GISEL-NEXT: v_max_f32_e32 v5, v5, v5
+; GFX10-GISEL-NEXT: v_min_f32_e32 v0, v0, v3
+; GFX10-GISEL-NEXT: v_min_f32_e32 v1, v1, v4
+; GFX10-GISEL-NEXT: v_min_f32_e32 v2, v2, v5
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_minimumnum_v3f32:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_dual_max_f32 v3, v3, v3 :: v_dual_max_f32 v0, v0, v0
+; GFX11-SDAG-NEXT: v_dual_max_f32 v4, v4, v4 :: v_dual_max_f32 v1, v1, v1
+; GFX11-SDAG-NEXT: v_dual_max_f32 v5, v5, v5 :: v_dual_max_f32 v2, v2, v2
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-NEXT: v_dual_min_f32 v0, v0, v3 :: v_dual_min_f32 v1, v1, v4
+; GFX11-SDAG-NEXT: v_min_f32_e32 v2, v2, v5
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_minimumnum_v3f32:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
+; GFX11-GISEL-NEXT: v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v3, v3, v3
+; GFX11-GISEL-NEXT: v_dual_max_f32 v4, v4, v4 :: v_dual_max_f32 v5, v5, v5
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_dual_min_f32 v0, v0, v3 :: v_dual_min_f32 v1, v1, v4
+; GFX11-GISEL-NEXT: v_min_f32_e32 v2, v2, v5
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-SDAG-LABEL: v_minimumnum_v3f32:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: v_dual_max_num_f32 v3, v3, v3 :: v_dual_max_num_f32 v0, v0, v0
+; GFX12-SDAG-NEXT: v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v1, v1, v1
+; GFX12-SDAG-NEXT: v_dual_max_num_f32 v5, v5, v5 :: v_dual_max_num_f32 v2, v2, v2
+; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX12-SDAG-NEXT: v_dual_min_num_f32 v0, v0, v3 :: v_dual_min_num_f32 v1, v1, v4
+; GFX12-SDAG-NEXT: v_min_num_f32_e32 v2, v2, v5
+; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GISEL-LABEL: v_minimumnum_v3f32:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
+; GFX12-GISEL-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3
+; GFX12-GISEL-NEXT: v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v5, v5, v5
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX12-GISEL-NEXT: v_dual_min_num_f32 v0, v0, v3 :: v_dual_min_num_f32 v1, v1, v4
+; GFX12-GISEL-NEXT: v_min_num_f32_e32 v2, v2, v5
+; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call <3 x float> @llvm.minimumnum.v3f32(<3 x float> %x, <3 x float> %y)
ret <3 x float> %result
}
@@ -3934,101 +6834,218 @@ define <3 x float> @v_minimumnum_v3f32_nnan(<3 x float> %x, <3 x float> %y) {
}
define <4 x float> @v_minimumnum_v4f32(<4 x float> %x, <4 x float> %y) {
-; GFX7-LABEL: v_minimumnum_v4f32:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_mul_f32_e32 v4, 1.0, v4
-; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
-; GFX7-NEXT: v_min_f32_e32 v0, v0, v4
-; GFX7-NEXT: v_mul_f32_e32 v4, 1.0, v5
-; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
-; GFX7-NEXT: v_min_f32_e32 v1, v1, v4
-; GFX7-NEXT: v_mul_f32_e32 v4, 1.0, v6
-; GFX7-NEXT: v_mul_f32_e32 v2, 1.0, v2
-; GFX7-NEXT: v_min_f32_e32 v2, v2, v4
-; GFX7-NEXT: v_mul_f32_e32 v4, 1.0, v7
-; GFX7-NEXT: v_mul_f32_e32 v3, 1.0, v3
-; GFX7-NEXT: v_min_f32_e32 v3, v3, v4
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_minimumnum_v4f32:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_mul_f32_e32 v4, 1.0, v4
-; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0
-; GFX8-NEXT: v_min_f32_e32 v0, v0, v4
-; GFX8-NEXT: v_mul_f32_e32 v4, 1.0, v5
-; GFX8-NEXT: v_mul_f32_e32 v1, 1.0, v1
-; GFX8-NEXT: v_min_f32_e32 v1, v1, v4
-; GFX8-NEXT: v_mul_f32_e32 v4, 1.0, v6
-; GFX8-NEXT: v_mul_f32_e32 v2, 1.0, v2
-; GFX8-NEXT: v_min_f32_e32 v2, v2, v4
-; GFX8-NEXT: v_mul_f32_e32 v4, 1.0, v7
-; GFX8-NEXT: v_mul_f32_e32 v3, 1.0, v3
-; GFX8-NEXT: v_min_f32_e32 v3, v3, v4
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: v_minimumnum_v4f32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v4, v4, v4
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-NEXT: v_min_f32_e32 v0, v0, v4
-; GFX9-NEXT: v_max_f32_e32 v4, v5, v5
-; GFX9-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX9-NEXT: v_min_f32_e32 v1, v1, v4
-; GFX9-NEXT: v_max_f32_e32 v4, v6, v6
-; GFX9-NEXT: v_max_f32_e32 v2, v2, v2
-; GFX9-NEXT: v_min_f32_e32 v2, v2, v4
-; GFX9-NEXT: v_max_f32_e32 v4, v7, v7
-; GFX9-NEXT: v_max_f32_e32 v3, v3, v3
-; GFX9-NEXT: v_min_f32_e32 v3, v3, v4
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_minimumnum_v4f32:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_max_f32_e32 v4, v4, v4
-; GFX10-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX10-NEXT: v_max_f32_e32 v5, v5, v5
-; GFX10-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX10-NEXT: v_max_f32_e32 v6, v6, v6
-; GFX10-NEXT: v_max_f32_e32 v2, v2, v2
-; GFX10-NEXT: v_max_f32_e32 v7, v7, v7
-; GFX10-NEXT: v_max_f32_e32 v3, v3, v3
-; GFX10-NEXT: v_min_f32_e32 v0, v0, v4
-; GFX10-NEXT: v_min_f32_e32 v1, v1, v5
-; GFX10-NEXT: v_min_f32_e32 v2, v2, v6
-; GFX10-NEXT: v_min_f32_e32 v3, v3, v7
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: v_minimumnum_v4f32:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_dual_max_f32 v4, v4, v4 :: v_dual_max_f32 v5, v5, v5
-; GFX11-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
-; GFX11-NEXT: v_dual_max_f32 v6, v6, v6 :: v_dual_max_f32 v7, v7, v7
-; GFX11-NEXT: v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v3, v3, v3
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_dual_min_f32 v0, v0, v4 :: v_dual_min_f32 v1, v1, v5
-; GFX11-NEXT: v_dual_min_f32 v2, v2, v6 :: v_dual_min_f32 v3, v3, v7
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_minimumnum_v4f32:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v5, v5, v5
-; GFX12-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
-; GFX12-NEXT: v_dual_max_num_f32 v6, v6, v6 :: v_dual_max_num_f32 v7, v7, v7
-; GFX12-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX12-NEXT: v_dual_min_num_f32 v0, v0, v4 :: v_dual_min_num_f32 v1, v1, v5
-; GFX12-NEXT: v_dual_min_num_f32 v2, v2, v6 :: v_dual_min_num_f32 v3, v3, v7
-; GFX12-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_minimumnum_v4f32:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v4, 1.0, v4
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v4
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v4, 1.0, v5
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX7-SDAG-NEXT: v_min_f32_e32 v1, v1, v4
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v4, 1.0, v6
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v2, 1.0, v2
+; GFX7-SDAG-NEXT: v_min_f32_e32 v2, v2, v4
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v4, 1.0, v7
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v3, 1.0, v3
+; GFX7-SDAG-NEXT: v_min_f32_e32 v3, v3, v4
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_minimumnum_v4f32:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v2, 1.0, v2
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v3, 1.0, v3
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v4, 1.0, v4
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v5, 1.0, v5
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v6, 1.0, v6
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v7, 1.0, v7
+; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v4
+; GFX7-GISEL-NEXT: v_min_f32_e32 v1, v1, v5
+; GFX7-GISEL-NEXT: v_min_f32_e32 v2, v2, v6
+; GFX7-GISEL-NEXT: v_min_f32_e32 v3, v3, v7
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_minimumnum_v4f32:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_mul_f32_e32 v4, 1.0, v4
+; GFX8-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX8-SDAG-NEXT: v_min_f32_e32 v0, v0, v4
+; GFX8-SDAG-NEXT: v_mul_f32_e32 v4, 1.0, v5
+; GFX8-SDAG-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX8-SDAG-NEXT: v_min_f32_e32 v1, v1, v4
+; GFX8-SDAG-NEXT: v_mul_f32_e32 v4, 1.0, v6
+; GFX8-SDAG-NEXT: v_mul_f32_e32 v2, 1.0, v2
+; GFX8-SDAG-NEXT: v_min_f32_e32 v2, v2, v4
+; GFX8-SDAG-NEXT: v_mul_f32_e32 v4, 1.0, v7
+; GFX8-SDAG-NEXT: v_mul_f32_e32 v3, 1.0, v3
+; GFX8-SDAG-NEXT: v_min_f32_e32 v3, v3, v4
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_minimumnum_v4f32:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX8-GISEL-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX8-GISEL-NEXT: v_mul_f32_e32 v2, 1.0, v2
+; GFX8-GISEL-NEXT: v_mul_f32_e32 v3, 1.0, v3
+; GFX8-GISEL-NEXT: v_mul_f32_e32 v4, 1.0, v4
+; GFX8-GISEL-NEXT: v_mul_f32_e32 v5, 1.0, v5
+; GFX8-GISEL-NEXT: v_mul_f32_e32 v6, 1.0, v6
+; GFX8-GISEL-NEXT: v_mul_f32_e32 v7, 1.0, v7
+; GFX8-GISEL-NEXT: v_min_f32_e32 v0, v0, v4
+; GFX8-GISEL-NEXT: v_min_f32_e32 v1, v1, v5
+; GFX8-GISEL-NEXT: v_min_f32_e32 v2, v2, v6
+; GFX8-GISEL-NEXT: v_min_f32_e32 v3, v3, v7
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: v_minimumnum_v4f32:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_max_f32_e32 v4, v4, v4
+; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX9-SDAG-NEXT: v_min_f32_e32 v0, v0, v4
+; GFX9-SDAG-NEXT: v_max_f32_e32 v4, v5, v5
+; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX9-SDAG-NEXT: v_min_f32_e32 v1, v1, v4
+; GFX9-SDAG-NEXT: v_max_f32_e32 v4, v6, v6
+; GFX9-SDAG-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX9-SDAG-NEXT: v_min_f32_e32 v2, v2, v4
+; GFX9-SDAG-NEXT: v_max_f32_e32 v4, v7, v7
+; GFX9-SDAG-NEXT: v_max_f32_e32 v3, v3, v3
+; GFX9-SDAG-NEXT: v_min_f32_e32 v3, v3, v4
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_minimumnum_v4f32:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX900-GISEL-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX900-GISEL-NEXT: v_max_f32_e32 v3, v3, v3
+; GFX900-GISEL-NEXT: v_max_f32_e32 v4, v4, v4
+; GFX900-GISEL-NEXT: v_max_f32_e32 v5, v5, v5
+; GFX900-GISEL-NEXT: v_max_f32_e32 v6, v6, v6
+; GFX900-GISEL-NEXT: v_max_f32_e32 v7, v7, v7
+; GFX900-GISEL-NEXT: v_min_f32_e32 v0, v0, v4
+; GFX900-GISEL-NEXT: v_min_f32_e32 v1, v1, v5
+; GFX900-GISEL-NEXT: v_min_f32_e32 v2, v2, v6
+; GFX900-GISEL-NEXT: v_min_f32_e32 v3, v3, v7
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_minimumnum_v4f32:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_pk_mul_f32 v[0:1], 1.0, v[0:1] op_sel_hi:[0,1]
+; GFX950-GISEL-NEXT: v_pk_mul_f32 v[4:5], 1.0, v[4:5] op_sel_hi:[0,1]
+; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-GISEL-NEXT: v_max_f32_e32 v4, v4, v4
+; GFX950-GISEL-NEXT: v_pk_mul_f32 v[2:3], 1.0, v[2:3] op_sel_hi:[0,1]
+; GFX950-GISEL-NEXT: v_pk_mul_f32 v[6:7], 1.0, v[6:7] op_sel_hi:[0,1]
+; GFX950-GISEL-NEXT: v_min_f32_e32 v0, v0, v4
+; GFX950-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX950-GISEL-NEXT: v_max_f32_e32 v4, v5, v5
+; GFX950-GISEL-NEXT: v_min_f32_e32 v1, v1, v4
+; GFX950-GISEL-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX950-GISEL-NEXT: v_max_f32_e32 v4, v6, v6
+; GFX950-GISEL-NEXT: v_min_f32_e32 v2, v2, v4
+; GFX950-GISEL-NEXT: v_max_f32_e32 v3, v3, v3
+; GFX950-GISEL-NEXT: v_max_f32_e32 v4, v7, v7
+; GFX950-GISEL-NEXT: v_min_f32_e32 v3, v3, v4
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_minimumnum_v4f32:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_max_f32_e32 v4, v4, v4
+; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX10-SDAG-NEXT: v_max_f32_e32 v5, v5, v5
+; GFX10-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX10-SDAG-NEXT: v_max_f32_e32 v6, v6, v6
+; GFX10-SDAG-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX10-SDAG-NEXT: v_max_f32_e32 v7, v7, v7
+; GFX10-SDAG-NEXT: v_max_f32_e32 v3, v3, v3
+; GFX10-SDAG-NEXT: v_min_f32_e32 v0, v0, v4
+; GFX10-SDAG-NEXT: v_min_f32_e32 v1, v1, v5
+; GFX10-SDAG-NEXT: v_min_f32_e32 v2, v2, v6
+; GFX10-SDAG-NEXT: v_min_f32_e32 v3, v3, v7
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_minimumnum_v4f32:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX10-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX10-GISEL-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX10-GISEL-NEXT: v_max_f32_e32 v3, v3, v3
+; GFX10-GISEL-NEXT: v_max_f32_e32 v4, v4, v4
+; GFX10-GISEL-NEXT: v_max_f32_e32 v5, v5, v5
+; GFX10-GISEL-NEXT: v_max_f32_e32 v6, v6, v6
+; GFX10-GISEL-NEXT: v_max_f32_e32 v7, v7, v7
+; GFX10-GISEL-NEXT: v_min_f32_e32 v0, v0, v4
+; GFX10-GISEL-NEXT: v_min_f32_e32 v1, v1, v5
+; GFX10-GISEL-NEXT: v_min_f32_e32 v2, v2, v6
+; GFX10-GISEL-NEXT: v_min_f32_e32 v3, v3, v7
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_minimumnum_v4f32:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_dual_max_f32 v4, v4, v4 :: v_dual_max_f32 v5, v5, v5
+; GFX11-SDAG-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
+; GFX11-SDAG-NEXT: v_dual_max_f32 v6, v6, v6 :: v_dual_max_f32 v7, v7, v7
+; GFX11-SDAG-NEXT: v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v3, v3, v3
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-NEXT: v_dual_min_f32 v0, v0, v4 :: v_dual_min_f32 v1, v1, v5
+; GFX11-SDAG-NEXT: v_dual_min_f32 v2, v2, v6 :: v_dual_min_f32 v3, v3, v7
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_minimumnum_v4f32:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
+; GFX11-GISEL-NEXT: v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v3, v3, v3
+; GFX11-GISEL-NEXT: v_dual_max_f32 v4, v4, v4 :: v_dual_max_f32 v5, v5, v5
+; GFX11-GISEL-NEXT: v_dual_max_f32 v6, v6, v6 :: v_dual_max_f32 v7, v7, v7
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_dual_min_f32 v0, v0, v4 :: v_dual_min_f32 v1, v1, v5
+; GFX11-GISEL-NEXT: v_dual_min_f32 v2, v2, v6 :: v_dual_min_f32 v3, v3, v7
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-SDAG-LABEL: v_minimumnum_v4f32:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v5, v5, v5
+; GFX12-SDAG-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
+; GFX12-SDAG-NEXT: v_dual_max_num_f32 v6, v6, v6 :: v_dual_max_num_f32 v7, v7, v7
+; GFX12-SDAG-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3
+; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX12-SDAG-NEXT: v_dual_min_num_f32 v0, v0, v4 :: v_dual_min_num_f32 v1, v1, v5
+; GFX12-SDAG-NEXT: v_dual_min_num_f32 v2, v2, v6 :: v_dual_min_num_f32 v3, v3, v7
+; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GISEL-LABEL: v_minimumnum_v4f32:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
+; GFX12-GISEL-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3
+; GFX12-GISEL-NEXT: v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v5, v5, v5
+; GFX12-GISEL-NEXT: v_dual_max_num_f32 v6, v6, v6 :: v_dual_max_num_f32 v7, v7, v7
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX12-GISEL-NEXT: v_dual_min_num_f32 v0, v0, v4 :: v_dual_min_num_f32 v1, v1, v5
+; GFX12-GISEL-NEXT: v_dual_min_num_f32 v2, v2, v6 :: v_dual_min_num_f32 v3, v3, v7
+; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call <4 x float> @llvm.minimumnum.v4f32(<4 x float> %x, <4 x float> %y)
ret <4 x float> %result
}
@@ -4092,88 +7109,171 @@ define <4 x float> @v_minimumnum_v4f32_nnan(<4 x float> %x, <4 x float> %y) {
}
define <2 x double> @v_minimumnum_v2f64(<2 x double> %x, <2 x double> %y) {
-; GFX7-LABEL: v_minimumnum_v2f64:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
-; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX7-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
-; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
-; GFX7-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_minimumnum_v2f64:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
-; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX8-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
-; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
-; GFX8-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX900-LABEL: v_minimumnum_v2f64:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
-; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX900-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
-; GFX900-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX900-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
-; GFX900-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX950-LABEL: v_minimumnum_v2f64:
-; GFX950: ; %bb.0:
-; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
-; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX950-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
-; GFX950-NEXT: v_max_f64 v[4:5], v[6:7], v[6:7]
-; GFX950-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX950-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5]
-; GFX950-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_minimumnum_v2f64:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
-; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX10-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
-; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
-; GFX10-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: v_minimumnum_v2f64:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
-; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX11-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
-; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
-; GFX11-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_minimumnum_v2f64:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[4:5]
-; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1]
-; GFX12-NEXT: v_max_num_f64_e32 v[6:7], v[6:7], v[6:7]
-; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3]
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[4:5]
-; GFX12-NEXT: v_min_num_f64_e32 v[2:3], v[2:3], v[6:7]
-; GFX12-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_minimumnum_v2f64:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX7-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX7-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX7-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX7-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX7-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_minimumnum_v2f64:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX7-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX7-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX7-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX7-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX7-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_minimumnum_v2f64:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX8-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX8-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX8-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX8-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX8-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_minimumnum_v2f64:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX8-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX8-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX8-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX8-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX8-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_minimumnum_v2f64:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX900-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX900-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX900-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX900-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX900-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_minimumnum_v2f64:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX900-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX900-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX900-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX900-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX900-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_minimumnum_v2f64:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX950-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[6:7], v[6:7]
+; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX950-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5]
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_minimumnum_v2f64:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX950-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX950-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX950-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX950-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX950-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_minimumnum_v2f64:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX10-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX10-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX10-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX10-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX10-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_minimumnum_v2f64:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX10-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX10-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX10-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX10-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX10-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_minimumnum_v2f64:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX11-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX11-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX11-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX11-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_minimumnum_v2f64:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX11-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX11-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX11-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX11-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-SDAG-LABEL: v_minimumnum_v2f64:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[4:5]
+; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1]
+; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[6:7], v[6:7], v[6:7]
+; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3]
+; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX12-SDAG-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[4:5]
+; GFX12-SDAG-NEXT: v_min_num_f64_e32 v[2:3], v[2:3], v[6:7]
+; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GISEL-LABEL: v_minimumnum_v2f64:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1]
+; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[4:5]
+; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3]
+; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[6:7], v[6:7], v[6:7]
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX12-GISEL-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[4:5]
+; GFX12-GISEL-NEXT: v_min_num_f64_e32 v[2:3], v[2:3], v[6:7]
+; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call <2 x double> @llvm.minimumnum.v2f64(<2 x double> %x, <2 x double> %y)
ret <2 x double> %result
}
@@ -4229,109 +7329,213 @@ define <2 x double> @v_minimumnum_v2f64_nnan(<2 x double> %x, <2 x double> %y) {
}
define <3 x double> @v_minimumnum_v3f64(<3 x double> %x, <3 x double> %y) {
-; GFX7-LABEL: v_minimumnum_v3f64:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
-; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX7-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
-; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX7-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX7-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
-; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
-; GFX7-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
-; GFX7-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_minimumnum_v3f64:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
-; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX8-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
-; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX8-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
-; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
-; GFX8-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
-; GFX8-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX900-LABEL: v_minimumnum_v3f64:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
-; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX900-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
-; GFX900-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX900-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX900-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
-; GFX900-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
-; GFX900-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
-; GFX900-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX950-LABEL: v_minimumnum_v3f64:
-; GFX950: ; %bb.0:
-; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
-; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX950-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
-; GFX950-NEXT: v_max_f64 v[6:7], v[8:9], v[8:9]
-; GFX950-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX950-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
-; GFX950-NEXT: v_max_f64 v[6:7], v[10:11], v[10:11]
-; GFX950-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
-; GFX950-NEXT: v_min_f64 v[4:5], v[4:5], v[6:7]
-; GFX950-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_minimumnum_v3f64:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
-; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX10-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
-; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX10-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX10-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
-; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
-; GFX10-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
-; GFX10-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: v_minimumnum_v3f64:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
-; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX11-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
-; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX11-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
-; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
-; GFX11-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_minimumnum_v3f64:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_max_num_f64_e32 v[6:7], v[6:7], v[6:7]
-; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1]
-; GFX12-NEXT: v_max_num_f64_e32 v[8:9], v[8:9], v[8:9]
-; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3]
-; GFX12-NEXT: v_max_num_f64_e32 v[10:11], v[10:11], v[10:11]
-; GFX12-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[4:5]
-; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[6:7]
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX12-NEXT: v_min_num_f64_e32 v[2:3], v[2:3], v[8:9]
-; GFX12-NEXT: v_min_num_f64_e32 v[4:5], v[4:5], v[10:11]
-; GFX12-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_minimumnum_v3f64:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX7-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX7-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
+; GFX7-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX7-SDAG-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
+; GFX7-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX7-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX7-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX7-SDAG-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_minimumnum_v3f64:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX7-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX7-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX7-GISEL-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
+; GFX7-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX7-GISEL-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
+; GFX7-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX7-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX7-GISEL-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_minimumnum_v3f64:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX8-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX8-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
+; GFX8-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX8-SDAG-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
+; GFX8-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX8-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX8-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX8-SDAG-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_minimumnum_v3f64:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX8-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX8-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX8-GISEL-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
+; GFX8-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX8-GISEL-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
+; GFX8-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX8-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX8-GISEL-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_minimumnum_v3f64:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX900-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX900-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
+; GFX900-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX900-SDAG-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
+; GFX900-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX900-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX900-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX900-SDAG-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_minimumnum_v3f64:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX900-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX900-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX900-GISEL-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
+; GFX900-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX900-GISEL-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
+; GFX900-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX900-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX900-GISEL-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_minimumnum_v3f64:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX950-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[8:9], v[8:9]
+; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX950-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
+; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[10:11], v[10:11]
+; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX950-SDAG-NEXT: v_min_f64 v[4:5], v[4:5], v[6:7]
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_minimumnum_v3f64:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX950-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX950-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX950-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX950-GISEL-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
+; GFX950-GISEL-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
+; GFX950-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX950-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX950-GISEL-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_minimumnum_v3f64:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX10-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX10-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
+; GFX10-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX10-SDAG-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
+; GFX10-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX10-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX10-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX10-SDAG-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_minimumnum_v3f64:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX10-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX10-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX10-GISEL-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
+; GFX10-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX10-GISEL-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
+; GFX10-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX10-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX10-GISEL-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_minimumnum_v3f64:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX11-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX11-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
+; GFX11-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX11-SDAG-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
+; GFX11-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX11-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX11-SDAG-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_minimumnum_v3f64:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX11-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX11-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX11-GISEL-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
+; GFX11-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX11-GISEL-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
+; GFX11-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX11-GISEL-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-SDAG-LABEL: v_minimumnum_v3f64:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[6:7], v[6:7], v[6:7]
+; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1]
+; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[8:9], v[8:9], v[8:9]
+; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3]
+; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[10:11], v[10:11], v[10:11]
+; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[4:5]
+; GFX12-SDAG-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[6:7]
+; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX12-SDAG-NEXT: v_min_num_f64_e32 v[2:3], v[2:3], v[8:9]
+; GFX12-SDAG-NEXT: v_min_num_f64_e32 v[4:5], v[4:5], v[10:11]
+; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GISEL-LABEL: v_minimumnum_v3f64:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1]
+; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[6:7], v[6:7], v[6:7]
+; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3]
+; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[8:9], v[8:9], v[8:9]
+; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[4:5]
+; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[10:11], v[10:11], v[10:11]
+; GFX12-GISEL-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[6:7]
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX12-GISEL-NEXT: v_min_num_f64_e32 v[2:3], v[2:3], v[8:9]
+; GFX12-GISEL-NEXT: v_min_num_f64_e32 v[4:5], v[4:5], v[10:11]
+; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call <3 x double> @llvm.minimumnum.v3f64(<3 x double> %x, <3 x double> %y)
ret <3 x double> %result
}
@@ -4344,179 +7548,304 @@ define <3 x double> @v_minimumnum_v3f64_nnan(<3 x double> %x, <3 x double> %y) {
; GFX7-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
; GFX7-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_minimumnum_v3f64_nnan:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
-; GFX8-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
-; GFX8-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: v_minimumnum_v3f64_nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
-; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
-; GFX9-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_minimumnum_v3f64_nnan:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
-; GFX10-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
-; GFX10-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: v_minimumnum_v3f64_nnan:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
-; GFX11-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
-; GFX11-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_minimumnum_v3f64_nnan:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[6:7]
-; GFX12-NEXT: v_min_num_f64_e32 v[2:3], v[2:3], v[8:9]
-; GFX12-NEXT: v_min_num_f64_e32 v[4:5], v[4:5], v[10:11]
-; GFX12-NEXT: s_setpc_b64 s[30:31]
- %result = call nnan <3 x double> @llvm.minimumnum.v3f64(<3 x double> %x, <3 x double> %y)
- ret <3 x double> %result
-}
-
-define <4 x double> @v_minimumnum_v4f64(<4 x double> %x, <4 x double> %y) {
-; GFX7-LABEL: v_minimumnum_v4f64:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
-; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX7-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX7-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX7-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
-; GFX7-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15]
-; GFX7-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
-; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
-; GFX7-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
-; GFX7-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
-; GFX7-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_minimumnum_v4f64:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
-; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX8-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX8-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
-; GFX8-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15]
-; GFX8-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
-; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
-; GFX8-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
-; GFX8-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
-; GFX8-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
+;
+; GFX8-LABEL: v_minimumnum_v3f64_nnan:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX8-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX8-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX900-LABEL: v_minimumnum_v4f64:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
-; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX900-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX900-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX900-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX900-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
-; GFX900-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15]
-; GFX900-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
-; GFX900-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
-; GFX900-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
-; GFX900-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
-; GFX900-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX950-LABEL: v_minimumnum_v4f64:
-; GFX950: ; %bb.0:
-; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
-; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX950-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
-; GFX950-NEXT: v_max_f64 v[8:9], v[10:11], v[10:11]
-; GFX950-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX950-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
-; GFX950-NEXT: v_max_f64 v[8:9], v[12:13], v[12:13]
-; GFX950-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
-; GFX950-NEXT: v_min_f64 v[4:5], v[4:5], v[8:9]
-; GFX950-NEXT: v_max_f64 v[8:9], v[14:15], v[14:15]
-; GFX950-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
-; GFX950-NEXT: v_min_f64 v[6:7], v[6:7], v[8:9]
-; GFX950-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_minimumnum_v4f64:
+; GFX9-LABEL: v_minimumnum_v3f64_nnan:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX9-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_minimumnum_v3f64_nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
-; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX10-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX10-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX10-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
-; GFX10-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15]
-; GFX10-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
-; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
-; GFX10-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
-; GFX10-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
-; GFX10-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
+; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX10-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX10-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: v_minimumnum_v4f64:
+; GFX11-LABEL: v_minimumnum_v3f64_nnan:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
-; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX11-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX11-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
-; GFX11-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15]
-; GFX11-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
-; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
-; GFX11-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
-; GFX11-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX11-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
+; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX11-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX11-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
-; GFX12-LABEL: v_minimumnum_v4f64:
+; GFX12-LABEL: v_minimumnum_v3f64_nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_max_num_f64_e32 v[8:9], v[8:9], v[8:9]
-; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1]
-; GFX12-NEXT: v_max_num_f64_e32 v[10:11], v[10:11], v[10:11]
-; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3]
-; GFX12-NEXT: v_max_num_f64_e32 v[12:13], v[12:13], v[12:13]
-; GFX12-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[4:5]
-; GFX12-NEXT: v_max_num_f64_e32 v[14:15], v[14:15], v[14:15]
-; GFX12-NEXT: v_max_num_f64_e32 v[6:7], v[6:7], v[6:7]
-; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[8:9]
-; GFX12-NEXT: v_min_num_f64_e32 v[2:3], v[2:3], v[10:11]
-; GFX12-NEXT: v_min_num_f64_e32 v[4:5], v[4:5], v[12:13]
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX12-NEXT: v_min_num_f64_e32 v[6:7], v[6:7], v[14:15]
+; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[6:7]
+; GFX12-NEXT: v_min_num_f64_e32 v[2:3], v[2:3], v[8:9]
+; GFX12-NEXT: v_min_num_f64_e32 v[4:5], v[4:5], v[10:11]
; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %result = call nnan <3 x double> @llvm.minimumnum.v3f64(<3 x double> %x, <3 x double> %y)
+ ret <3 x double> %result
+}
+
+define <4 x double> @v_minimumnum_v4f64(<4 x double> %x, <4 x double> %y) {
+; GFX7-SDAG-LABEL: v_minimumnum_v4f64:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
+; GFX7-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX7-SDAG-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
+; GFX7-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX7-SDAG-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
+; GFX7-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX7-SDAG-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15]
+; GFX7-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX7-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX7-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
+; GFX7-SDAG-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
+; GFX7-SDAG-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_minimumnum_v4f64:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX7-GISEL-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
+; GFX7-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX7-GISEL-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
+; GFX7-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX7-GISEL-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
+; GFX7-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX7-GISEL-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15]
+; GFX7-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX7-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
+; GFX7-GISEL-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
+; GFX7-GISEL-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_minimumnum_v4f64:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
+; GFX8-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX8-SDAG-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
+; GFX8-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX8-SDAG-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
+; GFX8-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX8-SDAG-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15]
+; GFX8-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX8-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX8-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
+; GFX8-SDAG-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
+; GFX8-SDAG-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_minimumnum_v4f64:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX8-GISEL-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
+; GFX8-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX8-GISEL-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
+; GFX8-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX8-GISEL-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
+; GFX8-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX8-GISEL-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15]
+; GFX8-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX8-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
+; GFX8-GISEL-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
+; GFX8-GISEL-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_minimumnum_v4f64:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
+; GFX900-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX900-SDAG-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
+; GFX900-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX900-SDAG-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
+; GFX900-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX900-SDAG-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15]
+; GFX900-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX900-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX900-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
+; GFX900-SDAG-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
+; GFX900-SDAG-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_minimumnum_v4f64:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX900-GISEL-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
+; GFX900-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX900-GISEL-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
+; GFX900-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX900-GISEL-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
+; GFX900-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX900-GISEL-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15]
+; GFX900-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX900-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
+; GFX900-GISEL-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
+; GFX900-GISEL-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_minimumnum_v4f64:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
+; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX950-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX950-SDAG-NEXT: v_max_f64 v[8:9], v[10:11], v[10:11]
+; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX950-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX950-SDAG-NEXT: v_max_f64 v[8:9], v[12:13], v[12:13]
+; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX950-SDAG-NEXT: v_min_f64 v[4:5], v[4:5], v[8:9]
+; GFX950-SDAG-NEXT: v_max_f64 v[8:9], v[14:15], v[14:15]
+; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX950-SDAG-NEXT: v_min_f64 v[6:7], v[6:7], v[8:9]
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_minimumnum_v4f64:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX950-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX950-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX950-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX950-GISEL-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
+; GFX950-GISEL-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
+; GFX950-GISEL-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
+; GFX950-GISEL-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15]
+; GFX950-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX950-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
+; GFX950-GISEL-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
+; GFX950-GISEL-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_minimumnum_v4f64:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
+; GFX10-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX10-SDAG-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
+; GFX10-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX10-SDAG-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
+; GFX10-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX10-SDAG-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15]
+; GFX10-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX10-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX10-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
+; GFX10-SDAG-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
+; GFX10-SDAG-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_minimumnum_v4f64:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX10-GISEL-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
+; GFX10-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX10-GISEL-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
+; GFX10-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX10-GISEL-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
+; GFX10-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX10-GISEL-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15]
+; GFX10-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX10-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
+; GFX10-GISEL-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
+; GFX10-GISEL-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_minimumnum_v4f64:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
+; GFX11-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX11-SDAG-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
+; GFX11-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX11-SDAG-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
+; GFX11-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX11-SDAG-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15]
+; GFX11-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX11-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX11-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
+; GFX11-SDAG-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX11-SDAG-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_minimumnum_v4f64:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX11-GISEL-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
+; GFX11-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX11-GISEL-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
+; GFX11-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX11-GISEL-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
+; GFX11-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX11-GISEL-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15]
+; GFX11-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX11-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
+; GFX11-GISEL-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX11-GISEL-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-SDAG-LABEL: v_minimumnum_v4f64:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[8:9], v[8:9], v[8:9]
+; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1]
+; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[10:11], v[10:11], v[10:11]
+; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3]
+; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[12:13], v[12:13], v[12:13]
+; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[4:5]
+; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[14:15], v[14:15], v[14:15]
+; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[6:7], v[6:7], v[6:7]
+; GFX12-SDAG-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[8:9]
+; GFX12-SDAG-NEXT: v_min_num_f64_e32 v[2:3], v[2:3], v[10:11]
+; GFX12-SDAG-NEXT: v_min_num_f64_e32 v[4:5], v[4:5], v[12:13]
+; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX12-SDAG-NEXT: v_min_num_f64_e32 v[6:7], v[6:7], v[14:15]
+; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GISEL-LABEL: v_minimumnum_v4f64:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1]
+; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[8:9], v[8:9], v[8:9]
+; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3]
+; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[10:11], v[10:11], v[10:11]
+; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[4:5]
+; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[12:13], v[12:13], v[12:13]
+; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[6:7], v[6:7], v[6:7]
+; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[14:15], v[14:15], v[14:15]
+; GFX12-GISEL-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[8:9]
+; GFX12-GISEL-NEXT: v_min_num_f64_e32 v[2:3], v[2:3], v[10:11]
+; GFX12-GISEL-NEXT: v_min_num_f64_e32 v[4:5], v[4:5], v[12:13]
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX12-GISEL-NEXT: v_min_num_f64_e32 v[6:7], v[6:7], v[14:15]
+; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call <4 x double> @llvm.minimumnum.v4f64(<4 x double> %x, <4 x double> %y)
ret <4 x double> %result
}
@@ -4584,97 +7913,183 @@ define <4 x double> @v_minimumnum_v4f64_nnan(<4 x double> %x, <4 x double> %y) {
}
define half @v_minimumnum_f16_no_ieee(half %x, half %y) #0 {
-; GFX7-LABEL: v_minimumnum_f16_no_ieee:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_minimumnum_f16_no_ieee:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_max_f16_e32 v1, v1, v1
-; GFX8-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX8-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: v_minimumnum_f16_no_ieee:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f16_e32 v1, v1, v1
-; GFX9-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX9-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_minimumnum_f16_no_ieee:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_max_f16_e32 v1, v1, v1
-; GFX10-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX10-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-TRUE16-LABEL: v_minimumnum_f16_no_ieee:
-; GFX11-TRUE16: ; %bb.0:
-; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.h, v1.l, v1.l
-; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-TRUE16-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h
-; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FAKE16-LABEL: v_minimumnum_f16_no_ieee:
-; GFX11-FAKE16: ; %bb.0:
-; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FAKE16-NEXT: v_max_f16_e32 v1, v1, v1
-; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-FAKE16-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-TRUE16-LABEL: v_minimumnum_f16_no_ieee:
-; GFX12-TRUE16: ; %bb.0:
-; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
-; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l
-; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
-; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
-; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-FAKE16-LABEL: v_minimumnum_f16_no_ieee:
-; GFX12-FAKE16: ; %bb.0:
-; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
-; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1
-; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0
-; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1
-; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_minimumnum_f16_no_ieee:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_minimumnum_f16_no_ieee:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_minimumnum_f16_no_ieee:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_minimumnum_f16_no_ieee:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX8-GISEL-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: v_minimumnum_f16_no_ieee:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX9-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: v_minimumnum_f16_no_ieee:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX9-GISEL-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX9-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_minimumnum_f16_no_ieee:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX10-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX10-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_minimumnum_f16_no_ieee:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX10-GISEL-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX10-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-SDAG-LABEL: v_minimumnum_f16_no_ieee:
+; GFX11-TRUE16-SDAG: ; %bb.0:
+; GFX11-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e32 v0.h, v1.l, v1.l
+; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l
+; GFX11-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-SDAG-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h
+; GFX11-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-GISEL-LABEL: v_minimumnum_f16_no_ieee:
+; GFX11-TRUE16-GISEL: ; %bb.0:
+; GFX11-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l
+; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e32 v0.h, v1.l, v1.l
+; GFX11-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-GISEL-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h
+; GFX11-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-SDAG-LABEL: v_minimumnum_f16_no_ieee:
+; GFX11-FAKE16-SDAG: ; %bb.0:
+; GFX11-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX11-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX11-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-GISEL-LABEL: v_minimumnum_f16_no_ieee:
+; GFX11-FAKE16-GISEL: ; %bb.0:
+; GFX11-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX11-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-TRUE16-SDAG-LABEL: v_minimumnum_f16_no_ieee:
+; GFX12-TRUE16-SDAG: ; %bb.0:
+; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l
+; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GFX12-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-TRUE16-SDAG-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
+; GFX12-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-TRUE16-GISEL-LABEL: v_minimumnum_f16_no_ieee:
+; GFX12-TRUE16-GISEL: ; %bb.0:
+; GFX12-TRUE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l
+; GFX12-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-TRUE16-GISEL-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
+; GFX12-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-FAKE16-SDAG-LABEL: v_minimumnum_f16_no_ieee:
+; GFX12-FAKE16-SDAG: ; %bb.0:
+; GFX12-FAKE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v1, v1, v1
+; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GFX12-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-FAKE16-SDAG-NEXT: v_min_num_f16_e32 v0, v0, v1
+; GFX12-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-FAKE16-GISEL-LABEL: v_minimumnum_f16_no_ieee:
+; GFX12-FAKE16-GISEL: ; %bb.0:
+; GFX12-FAKE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v1, v1, v1
+; GFX12-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-FAKE16-GISEL-NEXT: v_min_num_f16_e32 v0, v0, v1
+; GFX12-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call half @llvm.minimumnum.f16(half %x, half %y)
ret half %result
}
define half @v_minimumnum_f16_nan_no_ieee(half %x, half %y) #0 {
-; GFX7-LABEL: v_minimumnum_f16_nan_no_ieee:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX7-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_minimumnum_f16_nan_no_ieee:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_minimumnum_f16_nan_no_ieee:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimumnum_f16_nan_no_ieee:
; GFX8: ; %bb.0:
@@ -4730,57 +8145,109 @@ define half @v_minimumnum_f16_nan_no_ieee(half %x, half %y) #0 {
}
define float @v_minimumnum_f32_no_ieee(float %x, float %y) #0 {
-; GFX7-LABEL: v_minimumnum_f32_no_ieee:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
-; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
-; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_minimumnum_f32_no_ieee:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_mul_f32_e32 v1, 1.0, v1
-; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0
-; GFX8-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: v_minimumnum_f32_no_ieee:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_minimumnum_f32_no_ieee:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX10-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX10-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: v_minimumnum_f32_no_ieee:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v0, v0, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_minimumnum_f32_no_ieee:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT: v_min_num_f32_e32 v0, v0, v1
-; GFX12-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_minimumnum_f32_no_ieee:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_minimumnum_f32_no_ieee:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_minimumnum_f32_no_ieee:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX8-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX8-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_minimumnum_f32_no_ieee:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX8-GISEL-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX8-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: v_minimumnum_f32_no_ieee:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX9-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: v_minimumnum_f32_no_ieee:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX9-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX9-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_minimumnum_f32_no_ieee:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX10-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_minimumnum_f32_no_ieee:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX10-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX10-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_minimumnum_f32_no_ieee:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v0, v0, v0
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_minimumnum_f32_no_ieee:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-SDAG-LABEL: v_minimumnum_f32_no_ieee:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0
+; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-SDAG-NEXT: v_min_num_f32_e32 v0, v0, v1
+; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GISEL-LABEL: v_minimumnum_f32_no_ieee:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-GISEL-NEXT: v_min_num_f32_e32 v0, v0, v1
+; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call float @llvm.minimumnum.f32(float %x, float %y)
ret float %result
}
@@ -4830,59 +8297,113 @@ define float @v_minimumnum_f32_nnan_no_ieee(float %x, float %y) #0 {
}
define double @v_minimumnum_f64_no_ieee(double %x, double %y) #0 {
-; GFX7-LABEL: v_minimumnum_f64_no_ieee:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_minimumnum_f64_no_ieee:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: v_minimumnum_f64_no_ieee:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_minimumnum_f64_no_ieee:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: v_minimumnum_f64_no_ieee:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_minimumnum_f64_no_ieee:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3]
-; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1]
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[2:3]
-; GFX12-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_minimumnum_f64_no_ieee:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX7-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX7-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_minimumnum_f64_no_ieee:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX7-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX7-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_minimumnum_f64_no_ieee:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX8-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX8-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_minimumnum_f64_no_ieee:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX8-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX8-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: v_minimumnum_f64_no_ieee:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX9-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX9-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: v_minimumnum_f64_no_ieee:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX9-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX9-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_minimumnum_f64_no_ieee:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX10-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX10-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_minimumnum_f64_no_ieee:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX10-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX10-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_minimumnum_f64_no_ieee:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX11-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_minimumnum_f64_no_ieee:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX11-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-SDAG-LABEL: v_minimumnum_f64_no_ieee:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3]
+; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1]
+; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-SDAG-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[2:3]
+; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GISEL-LABEL: v_minimumnum_f64_no_ieee:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1]
+; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3]
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-GISEL-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[2:3]
+; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call double @llvm.minimumnum.f64(double %x, double %y)
ret double %result
}
@@ -4932,106 +8453,199 @@ define double @v_minimumnum_f64_nnan_no_ieee(double %x, double %y) #0 {
}
define <2 x half> @v_minimumnum_v2f16_no_ieee(<2 x half> %x, <2 x half> %y) #0 {
-; GFX7-LABEL: v_minimumnum_v2f16_no_ieee:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
-; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
-; GFX7-NEXT: v_min_f32_e32 v0, v0, v2
-; GFX7-NEXT: v_min_f32_e32 v1, v1, v3
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_minimumnum_v2f16_no_ieee:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_max_f16_sdwa v2, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_sdwa v3, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_max_f16_e32 v1, v1, v1
-; GFX8-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX8-NEXT: v_min_f16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX900-LABEL: v_minimumnum_v2f16_no_ieee:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX900-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX900-NEXT: v_pk_min_f16 v0, v0, v1
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX950-LABEL: v_minimumnum_v2f16_no_ieee:
-; GFX950: ; %bb.0:
-; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX950-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX950-NEXT: s_nop 0
-; GFX950-NEXT: v_pk_min_f16 v0, v0, v1
-; GFX950-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_minimumnum_v2f16_no_ieee:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX10-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX10-NEXT: v_pk_min_f16 v0, v0, v1
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: v_minimumnum_v2f16_no_ieee:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX11-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_pk_min_f16 v0, v0, v1
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_minimumnum_v2f16_no_ieee:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v1
-; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v0
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT: v_pk_min_num_f16 v0, v0, v1
-; GFX12-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_minimumnum_v2f16_no_ieee:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX7-SDAG-NEXT: v_min_f32_e32 v1, v1, v3
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_minimumnum_v2f16_no_ieee:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: v_min_f32_e32 v1, v1, v3
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_minimumnum_v2f16_no_ieee:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v2, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_sdwa v3, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX8-SDAG-NEXT: v_min_f16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v2
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_minimumnum_v2f16_no_ieee:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_max_f16_e32 v2, v0, v0
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v3, v1, v1
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_min_f16_e32 v2, v2, v3
+; GFX8-GISEL-NEXT: v_min_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_minimumnum_v2f16_no_ieee:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX900-SDAG-NEXT: v_pk_min_f16 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_minimumnum_v2f16_no_ieee:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX900-GISEL-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX900-GISEL-NEXT: v_pk_min_f16 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_minimumnum_v2f16_no_ieee:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX950-SDAG-NEXT: s_nop 0
+; GFX950-SDAG-NEXT: v_pk_min_f16 v0, v0, v1
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_minimumnum_v2f16_no_ieee:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX950-GISEL-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX950-GISEL-NEXT: s_nop 0
+; GFX950-GISEL-NEXT: v_pk_min_f16 v0, v0, v1
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_minimumnum_v2f16_no_ieee:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX10-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX10-SDAG-NEXT: v_pk_min_f16 v0, v0, v1
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_minimumnum_v2f16_no_ieee:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX10-GISEL-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX10-GISEL-NEXT: v_pk_min_f16 v0, v0, v1
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_minimumnum_v2f16_no_ieee:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX11-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_pk_min_f16 v0, v0, v1
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_minimumnum_v2f16_no_ieee:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX11-GISEL-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_pk_min_f16 v0, v0, v1
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-SDAG-LABEL: v_minimumnum_v2f16_no_ieee:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX12-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-SDAG-NEXT: v_pk_min_num_f16 v0, v0, v1
+; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GISEL-LABEL: v_minimumnum_v2f16_no_ieee:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX12-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-GISEL-NEXT: v_pk_min_num_f16 v0, v0, v1
+; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call <2 x half> @llvm.minimumnum.v2f16(<2 x half> %x, <2 x half> %y)
ret <2 x half> %result
}
define <2 x half> @v_minimumnum_v2f16_nnan_no_ieee(<2 x half> %x, <2 x half> %y) #0 {
-; GFX7-LABEL: v_minimumnum_v2f16_nnan_no_ieee:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
-; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_min_f32_e32 v0, v0, v2
-; GFX7-NEXT: v_min_f32_e32 v1, v1, v3
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_minimumnum_v2f16_nnan_no_ieee:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_min_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
-; GFX8-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_minimumnum_v2f16_nnan_no_ieee:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX7-SDAG-NEXT: v_min_f32_e32 v1, v1, v3
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_minimumnum_v2f16_nnan_no_ieee:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: v_min_f32_e32 v1, v1, v3
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_minimumnum_v2f16_nnan_no_ieee:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_min_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v2
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_minimumnum_v2f16_nnan_no_ieee:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_min_f16_e32 v2, v0, v1
+; GFX8-GISEL-NEXT: v_min_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_minimumnum_v2f16_nnan_no_ieee:
; GFX9: ; %bb.0:
@@ -5065,34 +8679,60 @@ define <2 x half> @v_minimumnum_v2f16_nnan_no_ieee(<2 x half> %x, <2 x half> %y)
}
define <3 x half> @v_minimumnum_v3f16_nnan_no_ieee(<3 x half> %x, <3 x half> %y) #0 {
-; GFX7-LABEL: v_minimumnum_v3f16_nnan_no_ieee:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
-; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
-; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
-; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
-; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
-; GFX7-NEXT: v_min_f32_e32 v0, v0, v3
-; GFX7-NEXT: v_min_f32_e32 v1, v1, v4
-; GFX7-NEXT: v_min_f32_e32 v2, v2, v5
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_minimumnum_v3f16_nnan_no_ieee:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_min_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_min_f16_e32 v0, v0, v2
-; GFX8-NEXT: v_min_f16_e32 v1, v1, v3
-; GFX8-NEXT: v_or_b32_e32 v0, v0, v4
-; GFX8-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_minimumnum_v3f16_nnan_no_ieee:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v5, v5
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v4, v4
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v3
+; GFX7-SDAG-NEXT: v_min_f32_e32 v1, v1, v4
+; GFX7-SDAG-NEXT: v_min_f32_e32 v2, v2, v5
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_minimumnum_v3f16_nnan_no_ieee:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v3
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v4
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v5
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: v_min_f32_e32 v1, v1, v3
+; GFX7-GISEL-NEXT: v_min_f32_e32 v2, v2, v4
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_minimumnum_v3f16_nnan_no_ieee:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_min_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v2
+; GFX8-SDAG-NEXT: v_min_f16_e32 v1, v1, v3
+; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v4
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_minimumnum_v3f16_nnan_no_ieee:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_min_f16_e32 v4, v0, v2
+; GFX8-GISEL-NEXT: v_min_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_min_f16_e32 v1, v1, v3
+; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_minimumnum_v3f16_nnan_no_ieee:
; GFX9: ; %bb.0:
@@ -5130,41 +8770,73 @@ define <3 x half> @v_minimumnum_v3f16_nnan_no_ieee(<3 x half> %x, <3 x half> %y)
}
define <4 x half> @v_minimumnum_v4f16_nnan_no_ieee(<4 x half> %x, <4 x half> %y) #0 {
-; GFX7-LABEL: v_minimumnum_v4f16_nnan_no_ieee:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
-; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
-; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
-; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
-; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
-; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
-; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
-; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
-; GFX7-NEXT: v_min_f32_e32 v0, v0, v4
-; GFX7-NEXT: v_min_f32_e32 v1, v1, v5
-; GFX7-NEXT: v_min_f32_e32 v2, v2, v6
-; GFX7-NEXT: v_min_f32_e32 v3, v3, v7
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_minimumnum_v4f16_nnan_no_ieee:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_min_f16_sdwa v4, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_min_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_min_f16_e32 v1, v1, v3
-; GFX8-NEXT: v_min_f16_e32 v0, v0, v2
-; GFX8-NEXT: v_or_b32_e32 v0, v0, v5
-; GFX8-NEXT: v_or_b32_e32 v1, v1, v4
-; GFX8-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_minimumnum_v4f16_nnan_no_ieee:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v7, v7
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v6, v6
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v5, v5
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v4, v4
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v7, v7
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v6, v6
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v4
+; GFX7-SDAG-NEXT: v_min_f32_e32 v1, v1, v5
+; GFX7-SDAG-NEXT: v_min_f32_e32 v2, v2, v6
+; GFX7-SDAG-NEXT: v_min_f32_e32 v3, v3, v7
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_minimumnum_v4f16_nnan_no_ieee:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v4
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v6
+; GFX7-GISEL-NEXT: v_min_f32_e32 v1, v1, v5
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v5, v7
+; GFX7-GISEL-NEXT: v_min_f32_e32 v2, v2, v4
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-GISEL-NEXT: v_min_f32_e32 v3, v3, v5
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_minimumnum_v4f16_nnan_no_ieee:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_min_f16_sdwa v4, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_min_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_min_f16_e32 v1, v1, v3
+; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v2
+; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v5
+; GFX8-SDAG-NEXT: v_or_b32_e32 v1, v1, v4
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_minimumnum_v4f16_nnan_no_ieee:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_min_f16_e32 v4, v0, v2
+; GFX8-GISEL-NEXT: v_min_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_min_f16_e32 v2, v1, v3
+; GFX8-GISEL-NEXT: v_min_f16_sdwa v1, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0
+; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v2, v1
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_minimumnum_v4f16_nnan_no_ieee:
; GFX9: ; %bb.0:
@@ -5202,3 +8874,6 @@ define <4 x half> @v_minimumnum_v4f16_nnan_no_ieee(<4 x half> %x, <4 x half> %y)
}
attributes #0 = { "amdgpu-ieee"="false" }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX900: {{.*}}
+; GFX950: {{.*}}
More information about the llvm-commits
mailing list