[llvm] e57b327 - AMDGPU: Legalize fminimum and fmaximum f32 for gfx950 (#117634)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 26 11:44:15 PST 2024
Author: Matt Arsenault
Date: 2024-11-26T14:44:09-05:00
New Revision: e57b327be27bd185595a3383dfac90ec6651c123
URL: https://github.com/llvm/llvm-project/commit/e57b327be27bd185595a3383dfac90ec6651c123
DIFF: https://github.com/llvm/llvm-project/commit/e57b327be27bd185595a3383dfac90ec6651c123.diff
LOG: AMDGPU: Legalize fminimum and fmaximum f32 for gfx950 (#117634)
Select to minimum3/maximum3. Leave f16/v2f16 for later
since it's complicated by only having the vector version.
Added:
Modified:
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/lib/Target/AMDGPU/VOP3Instructions.td
llvm/test/CodeGen/AMDGPU/fmaximum3.ll
llvm/test/CodeGen/AMDGPU/fminimum3.ll
llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll
llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 3f0845864336fe..2e0f95161935a9 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -855,6 +855,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction({ISD::FMINIMUM, ISD::FMAXIMUM},
{MVT::v4f16, MVT::v8f16, MVT::v16f16, MVT::v32f16},
Custom);
+ } else {
+ // FIXME: For nnan fmaximum, emit the fmaximum3 instead of fmaxnum
+ if (Subtarget->hasMinimum3Maximum3F32())
+ setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Legal);
}
setOperationAction(ISD::INTRINSIC_WO_CHAIN,
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 5d4d56e8b0ad22..2b207e008581b3 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -1234,6 +1234,23 @@ def : IntClampPat<V_MQSAD_PK_U16_U8_e64, int_amdgcn_mqsad_pk_u16_u8>;
def : IntClampPat<V_QSAD_PK_U16_U8_e64, int_amdgcn_qsad_pk_u16_u8>;
def : IntClampPat<V_MQSAD_U32_U8_e64, int_amdgcn_mqsad_u32_u8>;
+//===----------------------------------------------------------------------===//
+// Floating-point operation Patterns
+//===----------------------------------------------------------------------===//
+
+// Implement fminimum(x, y) by using minimum3(x, y, y)
+class MinimumMaximumByMinimum3Maximum3<SDPatternOperator node, ValueType vt,
+ Instruction inst> : GCNPat<
+ (vt (node (VOP3Mods vt:$src0, i32:$src0_mods), (VOP3Mods vt:$src1, i32:$src1_mods))),
+ (inst $src0_mods, $src0, $src1_mods, $src1, $src1_mods, $src1)
+>;
+
+// Prefer the real 2 operand form if legal
+let SubtargetPredicate = HasMinimum3Maximum3F32, AddedComplexity = -1000 in {
+def : MinimumMaximumByMinimum3Maximum3<fminimum, f32, V_MINIMUM3_F32_e64>;
+def : MinimumMaximumByMinimum3Maximum3<fmaximum, f32, V_MAXIMUM3_F32_e64>;
+}
+
//===----------------------------------------------------------------------===//
// Target-specific instruction encodings.
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/AMDGPU/fmaximum3.ll b/llvm/test/CodeGen/AMDGPU/fmaximum3.ll
index 08122cd0d89eab..209ae86b4dedce 100644
--- a/llvm/test/CodeGen/AMDGPU/fmaximum3.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmaximum3.ll
@@ -14,19 +14,26 @@ define float @v_fmaximum3_f32(float %a, float %b, float %c) {
; GFX12-NEXT: v_maximum3_f32 v0, v0, v1, v2
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fmaximum3_f32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v3, v0, v1
-; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-NEXT: v_max_f32_e32 v1, v0, v2
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fmaximum3_f32:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_max_f32_e32 v3, v0, v1
+; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX940-NEXT: v_max_f32_e32 v1, v0, v2
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fmaximum3_f32:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1
+; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.maximum.f32(float %a, float %b)
%max1 = call float @llvm.maximum.f32(float %max0, float %c)
ret float %max1
@@ -43,19 +50,26 @@ define float @v_fmaximum3_f32_commute(float %a, float %b, float %c) {
; GFX12-NEXT: v_maximum3_f32 v0, v2, v0, v1
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fmaximum3_f32_commute:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v3, v0, v1
-; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-NEXT: v_max_f32_e32 v1, v2, v0
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v0
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fmaximum3_f32_commute:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_max_f32_e32 v3, v0, v1
+; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX940-NEXT: v_max_f32_e32 v1, v2, v0
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v0
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fmaximum3_f32_commute:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1
+; GFX950-NEXT: v_maximum3_f32 v0, v2, v0, v0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.maximum.f32(float %a, float %b)
%max1 = call float @llvm.maximum.f32(float %c, float %max0)
ret float %max1
@@ -70,21 +84,30 @@ define amdgpu_ps i32 @s_fmaximum3_f32(float inreg %a, float inreg %b, float inre
; GFX12-NEXT: v_readfirstlane_b32 s0, v0
; GFX12-NEXT: ; return to shader part epilog
;
-; GFX9-LABEL: s_fmaximum3_f32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: v_mov_b32_e32 v0, s1
-; GFX9-NEXT: v_max_f32_e32 v1, s0, v0
-; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s0, v0
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX9-NEXT: v_max_f32_e32 v1, s2, v0
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s2, v0
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: v_readfirstlane_b32 s0, v0
-; GFX9-NEXT: ; return to shader part epilog
+; GFX940-LABEL: s_fmaximum3_f32:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: v_mov_b32_e32 v0, s1
+; GFX940-NEXT: v_max_f32_e32 v1, s0, v0
+; GFX940-NEXT: v_mov_b32_e32 v2, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, s0, v0
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX940-NEXT: v_max_f32_e32 v1, s2, v0
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, s2, v0
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX940-NEXT: s_nop 0
+; GFX940-NEXT: v_readfirstlane_b32 s0, v0
+; GFX940-NEXT: ; return to shader part epilog
+;
+; GFX950-LABEL: s_fmaximum3_f32:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: v_mov_b32_e32 v0, s0
+; GFX950-NEXT: v_maximum3_f32 v0, v0, s1, s1
+; GFX950-NEXT: v_maximum3_f32 v0, v0, s2, s2
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_readfirstlane_b32 s0, v0
+; GFX950-NEXT: ; return to shader part epilog
%max0 = call float @llvm.maximum.f32(float %a, float %b)
%max1 = call float @llvm.maximum.f32(float %max0, float %c)
%cast = bitcast float %max1 to i32
@@ -103,19 +126,26 @@ define float @v_fmaximum3_f32_fabs0(float %a, float %b, float %c) {
; GFX12-NEXT: v_maximum3_f32 v0, |v0|, v1, v2
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fmaximum3_f32_fabs0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e64 v3, |v0|, v1
-; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v0|, v1
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-NEXT: v_max_f32_e32 v1, v0, v2
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fmaximum3_f32_fabs0:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_max_f32_e64 v3, |v0|, v1
+; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v0|, v1
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX940-NEXT: v_max_f32_e32 v1, v0, v2
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fmaximum3_f32_fabs0:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_maximum3_f32 v0, |v0|, v1, v1
+; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call float @llvm.fabs.f32(float %a)
%max0 = call float @llvm.maximum.f32(float %a.fabs, float %b)
%max1 = call float @llvm.maximum.f32(float %max0, float %c)
@@ -133,19 +163,26 @@ define float @v_fmaximum3_f32_fabs1(float %a, float %b, float %c) {
; GFX12-NEXT: v_maximum3_f32 v0, v0, |v1|, v2
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fmaximum3_f32_fabs1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e64 v3, v0, |v1|
-; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v1|
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-NEXT: v_max_f32_e32 v1, v0, v2
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fmaximum3_f32_fabs1:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_max_f32_e64 v3, v0, |v1|
+; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, |v1|
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX940-NEXT: v_max_f32_e32 v1, v0, v2
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fmaximum3_f32_fabs1:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_maximum3_f32 v0, v0, |v1|, |v1|
+; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%b.fabs = call float @llvm.fabs.f32(float %b)
%max0 = call float @llvm.maximum.f32(float %a, float %b.fabs)
%max1 = call float @llvm.maximum.f32(float %max0, float %c)
@@ -163,19 +200,26 @@ define float @v_fmaximum3_f32_fabs2(float %a, float %b, float %c) {
; GFX12-NEXT: v_maximum3_f32 v0, v0, v1, |v2|
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fmaximum3_f32_fabs2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v3, v0, v1
-; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-NEXT: v_max_f32_e64 v1, v0, |v2|
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v2|
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fmaximum3_f32_fabs2:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_max_f32_e32 v3, v0, v1
+; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX940-NEXT: v_max_f32_e64 v1, v0, |v2|
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, |v2|
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fmaximum3_f32_fabs2:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1
+; GFX950-NEXT: v_maximum3_f32 v0, v0, |v2|, |v2|
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%c.fabs = call float @llvm.fabs.f32(float %c)
%max0 = call float @llvm.maximum.f32(float %a, float %b)
%max1 = call float @llvm.maximum.f32(float %max0, float %c.fabs)
@@ -193,19 +237,26 @@ define float @v_fmaximum3_f32_fabs_all(float %a, float %b, float %c) {
; GFX12-NEXT: v_maximum3_f32 v0, |v0|, |v1|, |v2|
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fmaximum3_f32_fabs_all:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e64 v3, |v0|, |v1|
-; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v1|
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-NEXT: v_max_f32_e64 v1, v0, |v2|
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v2|
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fmaximum3_f32_fabs_all:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_max_f32_e64 v3, |v0|, |v1|
+; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v1|
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX940-NEXT: v_max_f32_e64 v1, v0, |v2|
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, |v2|
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fmaximum3_f32_fabs_all:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_maximum3_f32 v0, |v0|, |v1|, |v1|
+; GFX950-NEXT: v_maximum3_f32 v0, v0, |v2|, |v2|
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call float @llvm.fabs.f32(float %a)
%b.fabs = call float @llvm.fabs.f32(float %b)
%c.fabs = call float @llvm.fabs.f32(float %c)
@@ -225,19 +276,26 @@ define float @v_fmaximum3_f32_fneg_all(float %a, float %b, float %c) {
; GFX12-NEXT: v_maximum3_f32 v0, -v0, -v1, -v2
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fmaximum3_f32_fneg_all:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e64 v3, -v0, -v1
-; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v1
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-NEXT: v_max_f32_e64 v1, v0, -v2
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v2
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fmaximum3_f32_fneg_all:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_max_f32_e64 v3, -v0, -v1
+; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v1
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX940-NEXT: v_max_f32_e64 v1, v0, -v2
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, -v2
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fmaximum3_f32_fneg_all:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_maximum3_f32 v0, -v0, -v1, -v1
+; GFX950-NEXT: v_maximum3_f32 v0, v0, -v2, -v2
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg float %a
%b.fneg = fneg float %b
%c.fneg = fneg float %c
@@ -257,19 +315,26 @@ define float @v_fmaximum3_f32_fneg_fabs_all(float %a, float %b, float %c) {
; GFX12-NEXT: v_maximum3_f32 v0, -|v0|, -|v1|, -|v2|
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fmaximum3_f32_fneg_fabs_all:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e64 v3, -|v0|, -|v1|
-; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -|v0|, -|v1|
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-NEXT: v_max_f32_e64 v1, v0, -|v2|
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -|v2|
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fmaximum3_f32_fneg_fabs_all:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_max_f32_e64 v3, -|v0|, -|v1|
+; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -|v0|, -|v1|
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX940-NEXT: v_max_f32_e64 v1, v0, -|v2|
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, -|v2|
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fmaximum3_f32_fneg_fabs_all:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_maximum3_f32 v0, -|v0|, -|v1|, -|v1|
+; GFX950-NEXT: v_maximum3_f32 v0, v0, -|v2|, -|v2|
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call float @llvm.fabs.f32(float %a)
%b.fabs = call float @llvm.fabs.f32(float %b)
%c.fabs = call float @llvm.fabs.f32(float %c)
@@ -292,19 +357,26 @@ define float @v_fmaximum3_f32_fneg0(float %a, float %b, float %c) {
; GFX12-NEXT: v_maximum3_f32 v0, -v0, v1, v2
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fmaximum3_f32_fneg0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e64 v3, -v0, v1
-; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v0, v1
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-NEXT: v_max_f32_e32 v1, v0, v2
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fmaximum3_f32_fneg0:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_max_f32_e64 v3, -v0, v1
+; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v0, v1
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX940-NEXT: v_max_f32_e32 v1, v0, v2
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fmaximum3_f32_fneg0:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_maximum3_f32 v0, -v0, v1, v1
+; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg float %a
%max0 = call float @llvm.maximum.f32(float %a.fneg, float %b)
%max1 = call float @llvm.maximum.f32(float %max0, float %c)
@@ -322,19 +394,26 @@ define float @v_fmaximum3_f32_fneg1(float %a, float %b, float %c) {
; GFX12-NEXT: v_maximum3_f32 v0, v0, -v1, v2
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fmaximum3_f32_fneg1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e64 v3, v0, -v1
-; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v1
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-NEXT: v_max_f32_e32 v1, v0, v2
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fmaximum3_f32_fneg1:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_max_f32_e64 v3, v0, -v1
+; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, -v1
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX940-NEXT: v_max_f32_e32 v1, v0, v2
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fmaximum3_f32_fneg1:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_maximum3_f32 v0, v0, -v1, -v1
+; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%b.fneg = fneg float %b
%max0 = call float @llvm.maximum.f32(float %a, float %b.fneg)
%max1 = call float @llvm.maximum.f32(float %max0, float %c)
@@ -352,19 +431,26 @@ define float @v_fmaximum3_f32_fneg2(float %a, float %b, float %c) {
; GFX12-NEXT: v_maximum3_f32 v0, v0, v1, -v2
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fmaximum3_f32_fneg2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v3, v0, v1
-; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-NEXT: v_max_f32_e64 v1, v0, -v2
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v2
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fmaximum3_f32_fneg2:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_max_f32_e32 v3, v0, v1
+; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX940-NEXT: v_max_f32_e64 v1, v0, -v2
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, -v2
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fmaximum3_f32_fneg2:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1
+; GFX950-NEXT: v_maximum3_f32 v0, v0, -v2, -v2
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%c.fneg = fneg float %c
%max0 = call float @llvm.maximum.f32(float %a, float %b)
%max1 = call float @llvm.maximum.f32(float %max0, float %c.fneg)
@@ -382,19 +468,27 @@ define float @v_fmaximum3_f32_const0(float %b, float %c) {
; GFX12-NEXT: v_maximum3_f32 v0, v0, 0x41000000, v1
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fmaximum3_f32_const0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v2, 0x41000000, v0
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT: v_max_f32_e32 v2, v0, v1
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fmaximum3_f32_const0:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_max_f32_e32 v2, 0x41000000, v0
+; GFX940-NEXT: v_mov_b32_e32 v3, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX940-NEXT: v_max_f32_e32 v2, v0, v1
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fmaximum3_f32_const0:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: s_mov_b32 s0, 0x41000000
+; GFX950-NEXT: v_maximum3_f32 v0, v0, s0, s0
+; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.maximum.f32(float 8.0, float %b)
%max1 = call float @llvm.maximum.f32(float %max0, float %c)
ret float %max1
@@ -411,19 +505,27 @@ define float @v_fmaximum3_f32__const2(float %a, float %b) {
; GFX12-NEXT: v_maximum3_f32 v0, v0, v1, 0x41000000
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fmaximum3_f32__const2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v2, v0, v1
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT: v_max_f32_e32 v1, 0x41000000, v0
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fmaximum3_f32__const2:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_max_f32_e32 v2, v0, v1
+; GFX940-NEXT: v_mov_b32_e32 v3, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX940-NEXT: v_max_f32_e32 v1, 0x41000000, v0
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fmaximum3_f32__const2:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1
+; GFX950-NEXT: s_mov_b32 s0, 0x41000000
+; GFX950-NEXT: v_maximum3_f32 v0, v0, s0, s0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.maximum.f32(float %a, float %b)
%max1 = call float @llvm.maximum.f32(float %max0, float 8.0)
ret float %max1
@@ -440,19 +542,26 @@ define float @v_fmaximum3_f32_inlineimm0(float %b, float %c) {
; GFX12-NEXT: v_maximum3_f32 v0, v0, 4.0, v1
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fmaximum3_f32_inlineimm0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v2, 4.0, v0
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT: v_max_f32_e32 v2, v0, v1
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fmaximum3_f32_inlineimm0:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_max_f32_e32 v2, 4.0, v0
+; GFX940-NEXT: v_mov_b32_e32 v3, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX940-NEXT: v_max_f32_e32 v2, v0, v1
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fmaximum3_f32_inlineimm0:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_maximum3_f32 v0, v0, 4.0, 4.0
+; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.maximum.f32(float 4.0, float %b)
%max1 = call float @llvm.maximum.f32(float %max0, float %c)
ret float %max1
@@ -469,19 +578,26 @@ define float @v_fmaximum3_f32__inlineimm(float %a, float %b) {
; GFX12-NEXT: v_maximum3_f32 v0, v0, v1, 4.0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fmaximum3_f32__inlineimm:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v2, v0, v1
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT: v_max_f32_e32 v1, 4.0, v0
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fmaximum3_f32__inlineimm:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_max_f32_e32 v2, v0, v1
+; GFX940-NEXT: v_mov_b32_e32 v3, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX940-NEXT: v_max_f32_e32 v1, 4.0, v0
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fmaximum3_f32__inlineimm:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1
+; GFX950-NEXT: v_maximum3_f32 v0, v0, 4.0, 4.0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.maximum.f32(float %a, float %b)
%max1 = call float @llvm.maximum.f32(float %max0, float 4.0)
ret float %max1
@@ -500,19 +616,28 @@ define float @v_fmaximum3_f32_const1_const2(float %a) {
; GFX12-NEXT: v_maximum3_f32 v0, v0, s0, 0x41800000
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fmaximum3_f32_const1_const2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v1, 0x41000000, v0
-; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX9-NEXT: v_max_f32_e32 v1, 0x41800000, v0
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fmaximum3_f32_const1_const2:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_max_f32_e32 v1, 0x41000000, v0
+; GFX940-NEXT: v_mov_b32_e32 v2, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX940-NEXT: v_max_f32_e32 v1, 0x41800000, v0
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fmaximum3_f32_const1_const2:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: s_mov_b32 s0, 0x41000000
+; GFX950-NEXT: v_maximum3_f32 v0, v0, s0, s0
+; GFX950-NEXT: s_mov_b32 s0, 0x41800000
+; GFX950-NEXT: v_maximum3_f32 v0, v0, s0, s0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.maximum.f32(float %a, float 8.0)
%max1 = call float @llvm.maximum.f32(float %max0, float 16.0)
ret float %max1
@@ -530,27 +655,36 @@ define <2 x float> @v_fmaximum3_v2f32(<2 x float> %a, <2 x float> %b, <2 x float
; GFX12-NEXT: v_maximum3_f32 v1, v5, v1, v3
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fmaximum3_v2f32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v6, v1, v3
-; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_max_f32_e32 v3, v0, v2
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
-; GFX9-NEXT: v_max_f32_e32 v2, v4, v0
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v4, v0
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX9-NEXT: v_max_f32_e32 v2, v5, v1
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v5, v1
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fmaximum3_v2f32:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_max_f32_e32 v6, v1, v3
+; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
+; GFX940-NEXT: v_max_f32_e32 v3, v0, v2
+; GFX940-NEXT: s_nop 0
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
+; GFX940-NEXT: v_max_f32_e32 v2, v4, v0
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v4, v0
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
+; GFX940-NEXT: v_max_f32_e32 v2, v5, v1
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v5, v1
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fmaximum3_v2f32:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_maximum3_f32 v1, v1, v3, v3
+; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2
+; GFX950-NEXT: v_maximum3_f32 v0, v4, v0, v0
+; GFX950-NEXT: v_maximum3_f32 v1, v5, v1, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %a, <2 x float> %b)
%max1 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %c, <2 x float> %max0)
ret <2 x float> %max1
@@ -568,27 +702,36 @@ define <2 x float> @v_fmaximum3_v2f32_commute(<2 x float> %a, <2 x float> %b, <2
; GFX12-NEXT: v_maximum3_f32 v1, v1, v3, v5
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fmaximum3_v2f32_commute:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v6, v1, v3
-; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_max_f32_e32 v3, v0, v2
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
-; GFX9-NEXT: v_max_f32_e32 v2, v0, v4
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX9-NEXT: v_max_f32_e32 v2, v1, v5
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fmaximum3_v2f32_commute:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_max_f32_e32 v6, v1, v3
+; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
+; GFX940-NEXT: v_max_f32_e32 v3, v0, v2
+; GFX940-NEXT: s_nop 0
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
+; GFX940-NEXT: v_max_f32_e32 v2, v0, v4
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
+; GFX940-NEXT: v_max_f32_e32 v2, v1, v5
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fmaximum3_v2f32_commute:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_maximum3_f32 v1, v1, v3, v3
+; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2
+; GFX950-NEXT: v_maximum3_f32 v0, v0, v4, v4
+; GFX950-NEXT: v_maximum3_f32 v1, v1, v5, v5
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %a, <2 x float> %b)
%max1 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %max0, <2 x float> %c)
ret <2 x float> %max1
@@ -606,27 +749,36 @@ define <2 x float> @v_fmaximum3_v2f32__fabs_all(<2 x float> %a, <2 x float> %b,
; GFX12-NEXT: v_maximum3_f32 v1, |v1|, |v3|, |v5|
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fmaximum3_v2f32__fabs_all:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e64 v6, |v1|, |v3|
-; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v1|, |v3|
-; GFX9-NEXT: v_max_f32_e64 v3, |v0|, |v2|
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v2|
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
-; GFX9-NEXT: v_max_f32_e64 v2, v0, |v4|
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v4|
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX9-NEXT: v_max_f32_e64 v2, v1, |v5|
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v1, |v5|
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fmaximum3_v2f32__fabs_all:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_max_f32_e64 v6, |v1|, |v3|
+; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v1|, |v3|
+; GFX940-NEXT: v_max_f32_e64 v3, |v0|, |v2|
+; GFX940-NEXT: s_nop 0
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v2|
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
+; GFX940-NEXT: v_max_f32_e64 v2, v0, |v4|
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, |v4|
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
+; GFX940-NEXT: v_max_f32_e64 v2, v1, |v5|
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v1, |v5|
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fmaximum3_v2f32__fabs_all:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_maximum3_f32 v1, |v1|, |v3|, |v3|
+; GFX950-NEXT: v_maximum3_f32 v0, |v0|, |v2|, |v2|
+; GFX950-NEXT: v_maximum3_f32 v0, v0, |v4|, |v4|
+; GFX950-NEXT: v_maximum3_f32 v1, v1, |v5|, |v5|
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a)
%b.fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %b)
%c.fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %c)
@@ -647,27 +799,36 @@ define <2 x float> @v_fmaximum3_v2f32__fneg_all(<2 x float> %a, <2 x float> %b,
; GFX12-NEXT: v_maximum3_f32 v1, -v1, -v3, -v5
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fmaximum3_v2f32__fneg_all:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e64 v6, -v1, -v3
-; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v1, -v3
-; GFX9-NEXT: v_max_f32_e64 v3, -v0, -v2
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v2
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
-; GFX9-NEXT: v_max_f32_e64 v2, v0, -v4
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v4
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX9-NEXT: v_max_f32_e64 v2, v1, -v5
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v1, -v5
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fmaximum3_v2f32__fneg_all:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_max_f32_e64 v6, -v1, -v3
+; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v1, -v3
+; GFX940-NEXT: v_max_f32_e64 v3, -v0, -v2
+; GFX940-NEXT: s_nop 0
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v2
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
+; GFX940-NEXT: v_max_f32_e64 v2, v0, -v4
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, -v4
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
+; GFX940-NEXT: v_max_f32_e64 v2, v1, -v5
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v1, -v5
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fmaximum3_v2f32__fneg_all:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_maximum3_f32 v1, -v1, -v3, -v3
+; GFX950-NEXT: v_maximum3_f32 v0, -v0, -v2, -v2
+; GFX950-NEXT: v_maximum3_f32 v0, v0, -v4, -v4
+; GFX950-NEXT: v_maximum3_f32 v1, v1, -v5, -v5
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg <2 x float> %a
%b.fneg = fneg <2 x float> %b
%c.fneg = fneg <2 x float> %c
@@ -688,27 +849,36 @@ define <2 x float> @v_fmaximum3_v2f32__inlineimm1(<2 x float> %a, <2 x float> %c
; GFX12-NEXT: v_maximum3_f32 v1, v1, 2.0, v3
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fmaximum3_v2f32__inlineimm1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v4, 2.0, v1
-; GFX9-NEXT: v_mov_b32_e32 v5, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v1
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX9-NEXT: v_max_f32_e32 v4, 2.0, v0
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX9-NEXT: v_max_f32_e32 v4, v0, v2
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_max_f32_e32 v2, v1, v3
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fmaximum3_v2f32__inlineimm1:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_max_f32_e32 v4, 2.0, v1
+; GFX940-NEXT: v_mov_b32_e32 v5, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v1
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
+; GFX940-NEXT: v_max_f32_e32 v4, 2.0, v0
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
+; GFX940-NEXT: v_max_f32_e32 v4, v0, v2
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX940-NEXT: v_max_f32_e32 v2, v1, v3
+; GFX940-NEXT: s_nop 0
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fmaximum3_v2f32__inlineimm1:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_maximum3_f32 v1, v1, 2.0, 2.0
+; GFX950-NEXT: v_maximum3_f32 v0, v0, 2.0, 2.0
+; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2
+; GFX950-NEXT: v_maximum3_f32 v1, v1, v3, v3
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %a, <2 x float> <float 2.0, float 2.0>)
%max1 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %max0, <2 x float> %c)
ret <2 x float> %max1
@@ -726,27 +896,36 @@ define <2 x float> @v_fmaximum3_v2f32__inlineimm2(<2 x float> %a, <2 x float> %b
; GFX12-NEXT: v_maximum3_f32 v1, v1, v3, 4.0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fmaximum3_v2f32__inlineimm2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v4, v1, v3
-; GFX9-NEXT: v_mov_b32_e32 v5, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_max_f32_e32 v3, v0, v2
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
-; GFX9-NEXT: v_max_f32_e32 v2, 4.0, v0
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc
-; GFX9-NEXT: v_max_f32_e32 v2, 4.0, v1
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v1
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fmaximum3_v2f32__inlineimm2:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_max_f32_e32 v4, v1, v3
+; GFX940-NEXT: v_mov_b32_e32 v5, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
+; GFX940-NEXT: v_max_f32_e32 v3, v0, v2
+; GFX940-NEXT: s_nop 0
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
+; GFX940-NEXT: v_max_f32_e32 v2, 4.0, v0
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc
+; GFX940-NEXT: v_max_f32_e32 v2, 4.0, v1
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v1
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fmaximum3_v2f32__inlineimm2:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_maximum3_f32 v1, v1, v3, v3
+; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2
+; GFX950-NEXT: v_maximum3_f32 v0, v0, 4.0, 4.0
+; GFX950-NEXT: v_maximum3_f32 v1, v1, 4.0, 4.0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %a, <2 x float> %b)
%max1 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %max0, <2 x float> <float 4.0, float 4.0>)
ret <2 x float> %max1
@@ -765,35 +944,46 @@ define <3 x float> @v_fmaximum3_v3f32(<3 x float> %a, <3 x float> %b, <3 x float
; GFX12-NEXT: v_maximum3_f32 v2, v8, v2, v5
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fmaximum3_v3f32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v9, v2, v5
-; GFX9-NEXT: v_mov_b32_e32 v10, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_max_f32_e32 v5, v1, v4
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_max_f32_e32 v4, v0, v3
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc
-; GFX9-NEXT: v_max_f32_e32 v3, v6, v0
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v6, v0
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc
-; GFX9-NEXT: v_max_f32_e32 v3, v7, v1
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v7, v1
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc
-; GFX9-NEXT: v_max_f32_e32 v3, v8, v2
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v8, v2
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fmaximum3_v3f32:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_max_f32_e32 v9, v2, v5
+; GFX940-NEXT: v_mov_b32_e32 v10, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
+; GFX940-NEXT: v_max_f32_e32 v5, v1, v4
+; GFX940-NEXT: s_nop 0
+; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
+; GFX940-NEXT: v_max_f32_e32 v4, v0, v3
+; GFX940-NEXT: s_nop 0
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc
+; GFX940-NEXT: v_max_f32_e32 v3, v6, v0
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v6, v0
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc
+; GFX940-NEXT: v_max_f32_e32 v3, v7, v1
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v7, v1
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc
+; GFX940-NEXT: v_max_f32_e32 v3, v8, v2
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v8, v2
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fmaximum3_v3f32:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_maximum3_f32 v2, v2, v5, v5
+; GFX950-NEXT: v_maximum3_f32 v1, v1, v4, v4
+; GFX950-NEXT: v_maximum3_f32 v0, v0, v3, v3
+; GFX950-NEXT: v_maximum3_f32 v0, v6, v0, v0
+; GFX950-NEXT: v_maximum3_f32 v1, v7, v1, v1
+; GFX950-NEXT: v_maximum3_f32 v2, v8, v2, v2
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %a, <3 x float> %b)
%max1 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %c, <3 x float> %max0)
ret <3 x float> %max1
@@ -812,35 +1002,46 @@ define <3 x float> @v_fmaximum3_v3f32_commute(<3 x float> %a, <3 x float> %b, <3
; GFX12-NEXT: v_maximum3_f32 v2, v2, v5, v8
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fmaximum3_v3f32_commute:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v9, v2, v5
-; GFX9-NEXT: v_mov_b32_e32 v10, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_max_f32_e32 v5, v1, v4
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_max_f32_e32 v4, v0, v3
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc
-; GFX9-NEXT: v_max_f32_e32 v3, v0, v6
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v6
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc
-; GFX9-NEXT: v_max_f32_e32 v3, v1, v7
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v7
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc
-; GFX9-NEXT: v_max_f32_e32 v3, v2, v8
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v8
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fmaximum3_v3f32_commute:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_max_f32_e32 v9, v2, v5
+; GFX940-NEXT: v_mov_b32_e32 v10, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
+; GFX940-NEXT: v_max_f32_e32 v5, v1, v4
+; GFX940-NEXT: s_nop 0
+; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
+; GFX940-NEXT: v_max_f32_e32 v4, v0, v3
+; GFX940-NEXT: s_nop 0
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc
+; GFX940-NEXT: v_max_f32_e32 v3, v0, v6
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v6
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc
+; GFX940-NEXT: v_max_f32_e32 v3, v1, v7
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v7
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc
+; GFX940-NEXT: v_max_f32_e32 v3, v2, v8
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v8
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fmaximum3_v3f32_commute:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_maximum3_f32 v2, v2, v5, v5
+; GFX950-NEXT: v_maximum3_f32 v1, v1, v4, v4
+; GFX950-NEXT: v_maximum3_f32 v0, v0, v3, v3
+; GFX950-NEXT: v_maximum3_f32 v0, v0, v6, v6
+; GFX950-NEXT: v_maximum3_f32 v1, v1, v7, v7
+; GFX950-NEXT: v_maximum3_f32 v2, v2, v8, v8
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %a, <3 x float> %b)
%max1 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %max0, <3 x float> %c)
ret <3 x float> %max1
@@ -859,35 +1060,46 @@ define <3 x float> @v_fmaximum3_v3f32__fabs_all(<3 x float> %a, <3 x float> %b,
; GFX12-NEXT: v_maximum3_f32 v2, |v2|, |v5|, |v8|
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fmaximum3_v3f32__fabs_all:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e64 v9, |v2|, |v5|
-; GFX9-NEXT: v_mov_b32_e32 v10, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v2|, |v5|
-; GFX9-NEXT: v_max_f32_e64 v5, |v1|, |v4|
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v1|, |v4|
-; GFX9-NEXT: v_max_f32_e64 v4, |v0|, |v3|
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v3|
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc
-; GFX9-NEXT: v_max_f32_e64 v3, v0, |v6|
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v6|
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc
-; GFX9-NEXT: v_max_f32_e64 v3, v1, |v7|
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v1, |v7|
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc
-; GFX9-NEXT: v_max_f32_e64 v3, v2, |v8|
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v2, |v8|
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fmaximum3_v3f32__fabs_all:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_max_f32_e64 v9, |v2|, |v5|
+; GFX940-NEXT: v_mov_b32_e32 v10, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v2|, |v5|
+; GFX940-NEXT: v_max_f32_e64 v5, |v1|, |v4|
+; GFX940-NEXT: s_nop 0
+; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v1|, |v4|
+; GFX940-NEXT: v_max_f32_e64 v4, |v0|, |v3|
+; GFX940-NEXT: s_nop 0
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v3|
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc
+; GFX940-NEXT: v_max_f32_e64 v3, v0, |v6|
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, |v6|
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc
+; GFX940-NEXT: v_max_f32_e64 v3, v1, |v7|
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v1, |v7|
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc
+; GFX940-NEXT: v_max_f32_e64 v3, v2, |v8|
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v2, |v8|
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fmaximum3_v3f32__fabs_all:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_maximum3_f32 v2, |v2|, |v5|, |v5|
+; GFX950-NEXT: v_maximum3_f32 v1, |v1|, |v4|, |v4|
+; GFX950-NEXT: v_maximum3_f32 v0, |v0|, |v3|, |v3|
+; GFX950-NEXT: v_maximum3_f32 v0, v0, |v6|, |v6|
+; GFX950-NEXT: v_maximum3_f32 v1, v1, |v7|, |v7|
+; GFX950-NEXT: v_maximum3_f32 v2, v2, |v8|, |v8|
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call <3 x float> @llvm.fabs.v3f32(<3 x float> %a)
%b.fabs = call <3 x float> @llvm.fabs.v3f32(<3 x float> %b)
%c.fabs = call <3 x float> @llvm.fabs.v3f32(<3 x float> %c)
@@ -909,35 +1121,46 @@ define <3 x float> @v_fmaximum3_v3f32__fneg_all(<3 x float> %a, <3 x float> %b,
; GFX12-NEXT: v_maximum3_f32 v2, -v2, -v5, -v8
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fmaximum3_v3f32__fneg_all:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e64 v9, -v2, -v5
-; GFX9-NEXT: v_mov_b32_e32 v10, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v2, -v5
-; GFX9-NEXT: v_max_f32_e64 v5, -v1, -v4
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v1, -v4
-; GFX9-NEXT: v_max_f32_e64 v4, -v0, -v3
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v3
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc
-; GFX9-NEXT: v_max_f32_e64 v3, v0, -v6
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v6
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc
-; GFX9-NEXT: v_max_f32_e64 v3, v1, -v7
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v1, -v7
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc
-; GFX9-NEXT: v_max_f32_e64 v3, v2, -v8
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v2, -v8
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fmaximum3_v3f32__fneg_all:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_max_f32_e64 v9, -v2, -v5
+; GFX940-NEXT: v_mov_b32_e32 v10, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v2, -v5
+; GFX940-NEXT: v_max_f32_e64 v5, -v1, -v4
+; GFX940-NEXT: s_nop 0
+; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v1, -v4
+; GFX940-NEXT: v_max_f32_e64 v4, -v0, -v3
+; GFX940-NEXT: s_nop 0
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v3
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc
+; GFX940-NEXT: v_max_f32_e64 v3, v0, -v6
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, -v6
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc
+; GFX940-NEXT: v_max_f32_e64 v3, v1, -v7
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v1, -v7
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc
+; GFX940-NEXT: v_max_f32_e64 v3, v2, -v8
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v2, -v8
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fmaximum3_v3f32__fneg_all:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_maximum3_f32 v2, -v2, -v5, -v5
+; GFX950-NEXT: v_maximum3_f32 v1, -v1, -v4, -v4
+; GFX950-NEXT: v_maximum3_f32 v0, -v0, -v3, -v3
+; GFX950-NEXT: v_maximum3_f32 v0, v0, -v6, -v6
+; GFX950-NEXT: v_maximum3_f32 v1, v1, -v7, -v7
+; GFX950-NEXT: v_maximum3_f32 v2, v2, -v8, -v8
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg <3 x float> %a
%b.fneg = fneg <3 x float> %b
%c.fneg = fneg <3 x float> %c
@@ -959,35 +1182,46 @@ define <3 x float> @v_fmaximum3_v3f32__inlineimm1(<3 x float> %a, <3 x float> %c
; GFX12-NEXT: v_maximum3_f32 v2, v2, 2.0, v5
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fmaximum3_v3f32__inlineimm1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v6, 2.0, v2
-; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v2
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc
-; GFX9-NEXT: v_max_f32_e32 v6, 2.0, v1
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v1
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
-; GFX9-NEXT: v_max_f32_e32 v6, 2.0, v0
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX9-NEXT: v_max_f32_e32 v6, v0, v3
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_max_f32_e32 v3, v1, v4
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX9-NEXT: v_max_f32_e32 v3, v2, v5
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fmaximum3_v3f32__inlineimm1:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_max_f32_e32 v6, 2.0, v2
+; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v2
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc
+; GFX940-NEXT: v_max_f32_e32 v6, 2.0, v1
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v1
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
+; GFX940-NEXT: v_max_f32_e32 v6, 2.0, v0
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX940-NEXT: v_max_f32_e32 v6, v0, v3
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
+; GFX940-NEXT: v_max_f32_e32 v3, v1, v4
+; GFX940-NEXT: s_nop 0
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
+; GFX940-NEXT: v_max_f32_e32 v3, v2, v5
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fmaximum3_v3f32__inlineimm1:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_maximum3_f32 v2, v2, 2.0, 2.0
+; GFX950-NEXT: v_maximum3_f32 v1, v1, 2.0, 2.0
+; GFX950-NEXT: v_maximum3_f32 v0, v0, 2.0, 2.0
+; GFX950-NEXT: v_maximum3_f32 v0, v0, v3, v3
+; GFX950-NEXT: v_maximum3_f32 v1, v1, v4, v4
+; GFX950-NEXT: v_maximum3_f32 v2, v2, v5, v5
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %a, <3 x float> <float 2.0, float 2.0, float 2.0>)
%max1 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %max0, <3 x float> %c)
ret <3 x float> %max1
@@ -1006,35 +1240,46 @@ define <3 x float> @v_fmaximum3_v3f32__inlineimm2(<3 x float> %a, <3 x float> %b
; GFX12-NEXT: v_maximum3_f32 v2, v2, v5, 4.0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fmaximum3_v3f32__inlineimm2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v6, v2, v5
-; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_max_f32_e32 v5, v1, v4
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_max_f32_e32 v4, v0, v3
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v5, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v4, vcc
-; GFX9-NEXT: v_max_f32_e32 v3, 4.0, v0
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
-; GFX9-NEXT: v_max_f32_e32 v3, 4.0, v1
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v1
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX9-NEXT: v_max_f32_e32 v3, 4.0, v2
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v2
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fmaximum3_v3f32__inlineimm2:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_max_f32_e32 v6, v2, v5
+; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
+; GFX940-NEXT: v_max_f32_e32 v5, v1, v4
+; GFX940-NEXT: s_nop 0
+; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
+; GFX940-NEXT: v_max_f32_e32 v4, v0, v3
+; GFX940-NEXT: s_nop 0
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v5, vcc
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v4, vcc
+; GFX940-NEXT: v_max_f32_e32 v3, 4.0, v0
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
+; GFX940-NEXT: v_max_f32_e32 v3, 4.0, v1
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v1
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
+; GFX940-NEXT: v_max_f32_e32 v3, 4.0, v2
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v2
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fmaximum3_v3f32__inlineimm2:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_maximum3_f32 v2, v2, v5, v5
+; GFX950-NEXT: v_maximum3_f32 v1, v1, v4, v4
+; GFX950-NEXT: v_maximum3_f32 v0, v0, v3, v3
+; GFX950-NEXT: v_maximum3_f32 v0, v0, 4.0, 4.0
+; GFX950-NEXT: v_maximum3_f32 v1, v1, 4.0, 4.0
+; GFX950-NEXT: v_maximum3_f32 v2, v2, 4.0, 4.0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %a, <3 x float> %b)
%max1 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %max0, <3 x float> <float 4.0, float 4.0, float 4.0>)
ret <3 x float> %max1
@@ -3165,19 +3410,26 @@ define <2 x float> @v_no_fmaximum3_f32__multi_use(float %a, float %b, float %c)
; GFX12-NEXT: v_maximum_f32 v1, v0, v2
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_no_fmaximum3_f32__multi_use:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v3, v0, v1
-; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-NEXT: v_max_f32_e32 v1, v0, v2
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_no_fmaximum3_f32__multi_use:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_max_f32_e32 v3, v0, v1
+; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX940-NEXT: v_max_f32_e32 v1, v0, v2
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_no_fmaximum3_f32__multi_use:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1
+; GFX950-NEXT: v_maximum3_f32 v1, v0, v2, v2
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.maximum.f32(float %a, float %b)
%max1 = call float @llvm.maximum.f32(float %max0, float %c)
%insert.0 = insertelement <2 x float> poison, float %max0, i32 0
@@ -3193,22 +3445,31 @@ define amdgpu_ps <2 x i32> @s_no_fmaximum3_f32__multi_use(float inreg %a, float
; GFX12-NEXT: s_maximum_f32 s1, s0, s2
; GFX12-NEXT: ; return to shader part epilog
;
-; GFX9-LABEL: s_no_fmaximum3_f32__multi_use:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: v_mov_b32_e32 v0, s1
-; GFX9-NEXT: v_max_f32_e32 v1, s0, v0
-; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s0, v0
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX9-NEXT: v_max_f32_e32 v1, s2, v0
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s2, v0
-; GFX9-NEXT: v_readfirstlane_b32 s0, v0
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: v_readfirstlane_b32 s1, v1
-; GFX9-NEXT: ; return to shader part epilog
+; GFX940-LABEL: s_no_fmaximum3_f32__multi_use:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: v_mov_b32_e32 v0, s1
+; GFX940-NEXT: v_max_f32_e32 v1, s0, v0
+; GFX940-NEXT: v_mov_b32_e32 v2, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, s0, v0
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX940-NEXT: v_max_f32_e32 v1, s2, v0
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, s2, v0
+; GFX940-NEXT: v_readfirstlane_b32 s0, v0
+; GFX940-NEXT: s_nop 0
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX940-NEXT: s_nop 0
+; GFX940-NEXT: v_readfirstlane_b32 s1, v1
+; GFX940-NEXT: ; return to shader part epilog
+;
+; GFX950-LABEL: s_no_fmaximum3_f32__multi_use:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: v_mov_b32_e32 v0, s0
+; GFX950-NEXT: v_maximum3_f32 v0, v0, s1, s1
+; GFX950-NEXT: v_maximum3_f32 v1, v0, s2, s2
+; GFX950-NEXT: v_readfirstlane_b32 s0, v0
+; GFX950-NEXT: v_readfirstlane_b32 s1, v1
+; GFX950-NEXT: ; return to shader part epilog
%max0 = call float @llvm.maximum.f32(float %a, float %b)
%max1 = call float @llvm.maximum.f32(float %max0, float %c)
%cast0 = bitcast float %max0 to i32
@@ -3372,6 +3633,3 @@ define <2 x double> @v_no_fmaximum3_f64__multi_use(double %a, double %b, double
%insert.1 = insertelement <2 x double> %insert.0, double %max1, i32 1
ret <2 x double> %insert.1
}
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; GFX940: {{.*}}
-; GFX950: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/fminimum3.ll b/llvm/test/CodeGen/AMDGPU/fminimum3.ll
index 43293512c8c21d..000f6c190b9773 100644
--- a/llvm/test/CodeGen/AMDGPU/fminimum3.ll
+++ b/llvm/test/CodeGen/AMDGPU/fminimum3.ll
@@ -14,19 +14,26 @@ define float @v_fminimum3_f32(float %a, float %b, float %c) {
; GFX12-NEXT: v_minimum3_f32 v0, v0, v1, v2
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fminimum3_f32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e32 v3, v0, v1
-; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-NEXT: v_min_f32_e32 v1, v0, v2
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fminimum3_f32:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_min_f32_e32 v3, v0, v1
+; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX940-NEXT: v_min_f32_e32 v1, v0, v2
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fminimum3_f32:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, v1
+; GFX950-NEXT: v_minimum3_f32 v0, v0, v2, v2
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.minimum.f32(float %a, float %b)
%max1 = call float @llvm.minimum.f32(float %max0, float %c)
ret float %max1
@@ -43,19 +50,26 @@ define float @v_fminimum3_f32_commute(float %a, float %b, float %c) {
; GFX12-NEXT: v_minimum3_f32 v0, v2, v0, v1
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fminimum3_f32_commute:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e32 v3, v0, v1
-; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-NEXT: v_min_f32_e32 v1, v2, v0
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v0
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fminimum3_f32_commute:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_min_f32_e32 v3, v0, v1
+; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX940-NEXT: v_min_f32_e32 v1, v2, v0
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v0
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fminimum3_f32_commute:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, v1
+; GFX950-NEXT: v_minimum3_f32 v0, v2, v0, v0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.minimum.f32(float %a, float %b)
%max1 = call float @llvm.minimum.f32(float %c, float %max0)
ret float %max1
@@ -70,21 +84,30 @@ define amdgpu_ps i32 @s_fminimum3_f32(float inreg %a, float inreg %b, float inre
; GFX12-NEXT: v_readfirstlane_b32 s0, v0
; GFX12-NEXT: ; return to shader part epilog
;
-; GFX9-LABEL: s_fminimum3_f32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: v_mov_b32_e32 v0, s1
-; GFX9-NEXT: v_min_f32_e32 v1, s0, v0
-; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s0, v0
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX9-NEXT: v_min_f32_e32 v1, s2, v0
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s2, v0
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: v_readfirstlane_b32 s0, v0
-; GFX9-NEXT: ; return to shader part epilog
+; GFX940-LABEL: s_fminimum3_f32:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: v_mov_b32_e32 v0, s1
+; GFX940-NEXT: v_min_f32_e32 v1, s0, v0
+; GFX940-NEXT: v_mov_b32_e32 v2, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, s0, v0
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX940-NEXT: v_min_f32_e32 v1, s2, v0
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, s2, v0
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX940-NEXT: s_nop 0
+; GFX940-NEXT: v_readfirstlane_b32 s0, v0
+; GFX940-NEXT: ; return to shader part epilog
+;
+; GFX950-LABEL: s_fminimum3_f32:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: v_mov_b32_e32 v0, s0
+; GFX950-NEXT: v_minimum3_f32 v0, v0, s1, s1
+; GFX950-NEXT: v_minimum3_f32 v0, v0, s2, s2
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_readfirstlane_b32 s0, v0
+; GFX950-NEXT: ; return to shader part epilog
%max0 = call float @llvm.minimum.f32(float %a, float %b)
%max1 = call float @llvm.minimum.f32(float %max0, float %c)
%cast = bitcast float %max1 to i32
@@ -103,19 +126,26 @@ define float @v_fminimum3_f32_fabs0(float %a, float %b, float %c) {
; GFX12-NEXT: v_minimum3_f32 v0, |v0|, v1, v2
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fminimum3_f32_fabs0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e64 v3, |v0|, v1
-; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v0|, v1
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-NEXT: v_min_f32_e32 v1, v0, v2
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fminimum3_f32_fabs0:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_min_f32_e64 v3, |v0|, v1
+; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v0|, v1
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX940-NEXT: v_min_f32_e32 v1, v0, v2
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fminimum3_f32_fabs0:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_minimum3_f32 v0, |v0|, v1, v1
+; GFX950-NEXT: v_minimum3_f32 v0, v0, v2, v2
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call float @llvm.fabs.f32(float %a)
%max0 = call float @llvm.minimum.f32(float %a.fabs, float %b)
%max1 = call float @llvm.minimum.f32(float %max0, float %c)
@@ -133,19 +163,26 @@ define float @v_fminimum3_f32_fabs1(float %a, float %b, float %c) {
; GFX12-NEXT: v_minimum3_f32 v0, v0, |v1|, v2
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fminimum3_f32_fabs1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e64 v3, v0, |v1|
-; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v1|
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-NEXT: v_min_f32_e32 v1, v0, v2
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fminimum3_f32_fabs1:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_min_f32_e64 v3, v0, |v1|
+; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, |v1|
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX940-NEXT: v_min_f32_e32 v1, v0, v2
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fminimum3_f32_fabs1:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_minimum3_f32 v0, v0, |v1|, |v1|
+; GFX950-NEXT: v_minimum3_f32 v0, v0, v2, v2
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%b.fabs = call float @llvm.fabs.f32(float %b)
%max0 = call float @llvm.minimum.f32(float %a, float %b.fabs)
%max1 = call float @llvm.minimum.f32(float %max0, float %c)
@@ -163,19 +200,26 @@ define float @v_fminimum3_f32_fabs2(float %a, float %b, float %c) {
; GFX12-NEXT: v_minimum3_f32 v0, v0, v1, |v2|
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fminimum3_f32_fabs2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e32 v3, v0, v1
-; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-NEXT: v_min_f32_e64 v1, v0, |v2|
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v2|
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fminimum3_f32_fabs2:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_min_f32_e32 v3, v0, v1
+; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX940-NEXT: v_min_f32_e64 v1, v0, |v2|
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, |v2|
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fminimum3_f32_fabs2:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, v1
+; GFX950-NEXT: v_minimum3_f32 v0, v0, |v2|, |v2|
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%c.fabs = call float @llvm.fabs.f32(float %c)
%max0 = call float @llvm.minimum.f32(float %a, float %b)
%max1 = call float @llvm.minimum.f32(float %max0, float %c.fabs)
@@ -193,19 +237,26 @@ define float @v_fminimum3_f32_fabs_all(float %a, float %b, float %c) {
; GFX12-NEXT: v_minimum3_f32 v0, |v0|, |v1|, |v2|
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fminimum3_f32_fabs_all:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e64 v3, |v0|, |v1|
-; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v1|
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-NEXT: v_min_f32_e64 v1, v0, |v2|
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v2|
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fminimum3_f32_fabs_all:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_min_f32_e64 v3, |v0|, |v1|
+; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v1|
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX940-NEXT: v_min_f32_e64 v1, v0, |v2|
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, |v2|
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fminimum3_f32_fabs_all:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_minimum3_f32 v0, |v0|, |v1|, |v1|
+; GFX950-NEXT: v_minimum3_f32 v0, v0, |v2|, |v2|
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call float @llvm.fabs.f32(float %a)
%b.fabs = call float @llvm.fabs.f32(float %b)
%c.fabs = call float @llvm.fabs.f32(float %c)
@@ -225,19 +276,26 @@ define float @v_fminimum3_f32_fneg_all(float %a, float %b, float %c) {
; GFX12-NEXT: v_minimum3_f32 v0, -v0, -v1, -v2
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fminimum3_f32_fneg_all:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e64 v3, -v0, -v1
-; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v1
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-NEXT: v_min_f32_e64 v1, v0, -v2
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v2
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fminimum3_f32_fneg_all:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_min_f32_e64 v3, -v0, -v1
+; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v1
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX940-NEXT: v_min_f32_e64 v1, v0, -v2
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, -v2
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fminimum3_f32_fneg_all:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_minimum3_f32 v0, -v0, -v1, -v1
+; GFX950-NEXT: v_minimum3_f32 v0, v0, -v2, -v2
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg float %a
%b.fneg = fneg float %b
%c.fneg = fneg float %c
@@ -257,19 +315,26 @@ define float @v_fminimum3_f32_fneg_fabs_all(float %a, float %b, float %c) {
; GFX12-NEXT: v_minimum3_f32 v0, -|v0|, -|v1|, -|v2|
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fminimum3_f32_fneg_fabs_all:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e64 v3, -|v0|, -|v1|
-; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -|v0|, -|v1|
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-NEXT: v_min_f32_e64 v1, v0, -|v2|
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -|v2|
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fminimum3_f32_fneg_fabs_all:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_min_f32_e64 v3, -|v0|, -|v1|
+; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -|v0|, -|v1|
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX940-NEXT: v_min_f32_e64 v1, v0, -|v2|
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, -|v2|
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fminimum3_f32_fneg_fabs_all:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_minimum3_f32 v0, -|v0|, -|v1|, -|v1|
+; GFX950-NEXT: v_minimum3_f32 v0, v0, -|v2|, -|v2|
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call float @llvm.fabs.f32(float %a)
%b.fabs = call float @llvm.fabs.f32(float %b)
%c.fabs = call float @llvm.fabs.f32(float %c)
@@ -292,19 +357,26 @@ define float @v_fminimum3_f32_fneg0(float %a, float %b, float %c) {
; GFX12-NEXT: v_minimum3_f32 v0, -v0, v1, v2
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fminimum3_f32_fneg0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e64 v3, -v0, v1
-; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v0, v1
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-NEXT: v_min_f32_e32 v1, v0, v2
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fminimum3_f32_fneg0:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_min_f32_e64 v3, -v0, v1
+; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v0, v1
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX940-NEXT: v_min_f32_e32 v1, v0, v2
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fminimum3_f32_fneg0:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_minimum3_f32 v0, -v0, v1, v1
+; GFX950-NEXT: v_minimum3_f32 v0, v0, v2, v2
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg float %a
%max0 = call float @llvm.minimum.f32(float %a.fneg, float %b)
%max1 = call float @llvm.minimum.f32(float %max0, float %c)
@@ -322,19 +394,26 @@ define float @v_fminimum3_f32_fneg1(float %a, float %b, float %c) {
; GFX12-NEXT: v_minimum3_f32 v0, v0, -v1, v2
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fminimum3_f32_fneg1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e64 v3, v0, -v1
-; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v1
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-NEXT: v_min_f32_e32 v1, v0, v2
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fminimum3_f32_fneg1:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_min_f32_e64 v3, v0, -v1
+; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, -v1
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX940-NEXT: v_min_f32_e32 v1, v0, v2
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fminimum3_f32_fneg1:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_minimum3_f32 v0, v0, -v1, -v1
+; GFX950-NEXT: v_minimum3_f32 v0, v0, v2, v2
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%b.fneg = fneg float %b
%max0 = call float @llvm.minimum.f32(float %a, float %b.fneg)
%max1 = call float @llvm.minimum.f32(float %max0, float %c)
@@ -352,19 +431,26 @@ define float @v_fminimum3_f32_fneg2(float %a, float %b, float %c) {
; GFX12-NEXT: v_minimum3_f32 v0, v0, v1, -v2
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fminimum3_f32_fneg2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e32 v3, v0, v1
-; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-NEXT: v_min_f32_e64 v1, v0, -v2
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v2
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fminimum3_f32_fneg2:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_min_f32_e32 v3, v0, v1
+; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX940-NEXT: v_min_f32_e64 v1, v0, -v2
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, -v2
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fminimum3_f32_fneg2:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, v1
+; GFX950-NEXT: v_minimum3_f32 v0, v0, -v2, -v2
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%c.fneg = fneg float %c
%max0 = call float @llvm.minimum.f32(float %a, float %b)
%max1 = call float @llvm.minimum.f32(float %max0, float %c.fneg)
@@ -382,19 +468,27 @@ define float @v_fminimum3_f32_const0(float %b, float %c) {
; GFX12-NEXT: v_minimum3_f32 v0, v0, 0x41000000, v1
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fminimum3_f32_const0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e32 v2, 0x41000000, v0
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT: v_min_f32_e32 v2, v0, v1
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fminimum3_f32_const0:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_min_f32_e32 v2, 0x41000000, v0
+; GFX940-NEXT: v_mov_b32_e32 v3, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX940-NEXT: v_min_f32_e32 v2, v0, v1
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fminimum3_f32_const0:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: s_mov_b32 s0, 0x41000000
+; GFX950-NEXT: v_minimum3_f32 v0, v0, s0, s0
+; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.minimum.f32(float 8.0, float %b)
%max1 = call float @llvm.minimum.f32(float %max0, float %c)
ret float %max1
@@ -411,19 +505,27 @@ define float @v_fminimum3_f32__const2(float %a, float %b) {
; GFX12-NEXT: v_minimum3_f32 v0, v0, v1, 0x41000000
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fminimum3_f32__const2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e32 v2, v0, v1
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT: v_min_f32_e32 v1, 0x41000000, v0
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fminimum3_f32__const2:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_min_f32_e32 v2, v0, v1
+; GFX940-NEXT: v_mov_b32_e32 v3, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX940-NEXT: v_min_f32_e32 v1, 0x41000000, v0
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fminimum3_f32__const2:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, v1
+; GFX950-NEXT: s_mov_b32 s0, 0x41000000
+; GFX950-NEXT: v_minimum3_f32 v0, v0, s0, s0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.minimum.f32(float %a, float %b)
%max1 = call float @llvm.minimum.f32(float %max0, float 8.0)
ret float %max1
@@ -440,19 +542,26 @@ define float @v_fminimum3_f32_inlineimm0(float %b, float %c) {
; GFX12-NEXT: v_minimum3_f32 v0, v0, 4.0, v1
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fminimum3_f32_inlineimm0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e32 v2, 4.0, v0
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT: v_min_f32_e32 v2, v0, v1
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fminimum3_f32_inlineimm0:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_min_f32_e32 v2, 4.0, v0
+; GFX940-NEXT: v_mov_b32_e32 v3, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX940-NEXT: v_min_f32_e32 v2, v0, v1
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fminimum3_f32_inlineimm0:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_minimum3_f32 v0, v0, 4.0, 4.0
+; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.minimum.f32(float 4.0, float %b)
%max1 = call float @llvm.minimum.f32(float %max0, float %c)
ret float %max1
@@ -469,19 +578,26 @@ define float @v_fminimum3_f32__inlineimm(float %a, float %b) {
; GFX12-NEXT: v_minimum3_f32 v0, v0, v1, 4.0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fminimum3_f32__inlineimm:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e32 v2, v0, v1
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT: v_min_f32_e32 v1, 4.0, v0
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fminimum3_f32__inlineimm:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_min_f32_e32 v2, v0, v1
+; GFX940-NEXT: v_mov_b32_e32 v3, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX940-NEXT: v_min_f32_e32 v1, 4.0, v0
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fminimum3_f32__inlineimm:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, v1
+; GFX950-NEXT: v_minimum3_f32 v0, v0, 4.0, 4.0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.minimum.f32(float %a, float %b)
%max1 = call float @llvm.minimum.f32(float %max0, float 4.0)
ret float %max1
@@ -500,19 +616,28 @@ define float @v_fminimum3_f32_const1_const2(float %a) {
; GFX12-NEXT: v_minimum3_f32 v0, v0, s0, 0x41800000
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fminimum3_f32_const1_const2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e32 v1, 0x41000000, v0
-; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX9-NEXT: v_min_f32_e32 v1, 0x41800000, v0
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fminimum3_f32_const1_const2:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_min_f32_e32 v1, 0x41000000, v0
+; GFX940-NEXT: v_mov_b32_e32 v2, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX940-NEXT: v_min_f32_e32 v1, 0x41800000, v0
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fminimum3_f32_const1_const2:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: s_mov_b32 s0, 0x41000000
+; GFX950-NEXT: v_minimum3_f32 v0, v0, s0, s0
+; GFX950-NEXT: s_mov_b32 s0, 0x41800000
+; GFX950-NEXT: v_minimum3_f32 v0, v0, s0, s0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.minimum.f32(float %a, float 8.0)
%max1 = call float @llvm.minimum.f32(float %max0, float 16.0)
ret float %max1
@@ -530,27 +655,36 @@ define <2 x float> @v_fminimum3_v2f32(<2 x float> %a, <2 x float> %b, <2 x float
; GFX12-NEXT: v_minimum3_f32 v1, v5, v1, v3
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fminimum3_v2f32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e32 v6, v1, v3
-; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_min_f32_e32 v3, v0, v2
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
-; GFX9-NEXT: v_min_f32_e32 v2, v4, v0
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v4, v0
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX9-NEXT: v_min_f32_e32 v2, v5, v1
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v5, v1
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fminimum3_v2f32:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_min_f32_e32 v6, v1, v3
+; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
+; GFX940-NEXT: v_min_f32_e32 v3, v0, v2
+; GFX940-NEXT: s_nop 0
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
+; GFX940-NEXT: v_min_f32_e32 v2, v4, v0
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v4, v0
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
+; GFX940-NEXT: v_min_f32_e32 v2, v5, v1
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v5, v1
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fminimum3_v2f32:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_minimum3_f32 v1, v1, v3, v3
+; GFX950-NEXT: v_minimum3_f32 v0, v0, v2, v2
+; GFX950-NEXT: v_minimum3_f32 v0, v4, v0, v0
+; GFX950-NEXT: v_minimum3_f32 v1, v5, v1, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %a, <2 x float> %b)
%max1 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %c, <2 x float> %max0)
ret <2 x float> %max1
@@ -568,27 +702,36 @@ define <2 x float> @v_fminimum3_v2f32_commute(<2 x float> %a, <2 x float> %b, <2
; GFX12-NEXT: v_minimum3_f32 v1, v1, v3, v5
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fminimum3_v2f32_commute:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e32 v6, v1, v3
-; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_min_f32_e32 v3, v0, v2
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
-; GFX9-NEXT: v_min_f32_e32 v2, v0, v4
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX9-NEXT: v_min_f32_e32 v2, v1, v5
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fminimum3_v2f32_commute:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_min_f32_e32 v6, v1, v3
+; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
+; GFX940-NEXT: v_min_f32_e32 v3, v0, v2
+; GFX940-NEXT: s_nop 0
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
+; GFX940-NEXT: v_min_f32_e32 v2, v0, v4
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
+; GFX940-NEXT: v_min_f32_e32 v2, v1, v5
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fminimum3_v2f32_commute:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_minimum3_f32 v1, v1, v3, v3
+; GFX950-NEXT: v_minimum3_f32 v0, v0, v2, v2
+; GFX950-NEXT: v_minimum3_f32 v0, v0, v4, v4
+; GFX950-NEXT: v_minimum3_f32 v1, v1, v5, v5
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %a, <2 x float> %b)
%max1 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %max0, <2 x float> %c)
ret <2 x float> %max1
@@ -606,27 +749,36 @@ define <2 x float> @v_fminimum3_v2f32__fabs_all(<2 x float> %a, <2 x float> %b,
; GFX12-NEXT: v_minimum3_f32 v1, |v1|, |v3|, |v5|
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fminimum3_v2f32__fabs_all:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e64 v6, |v1|, |v3|
-; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v1|, |v3|
-; GFX9-NEXT: v_min_f32_e64 v3, |v0|, |v2|
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v2|
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
-; GFX9-NEXT: v_min_f32_e64 v2, v0, |v4|
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v4|
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX9-NEXT: v_min_f32_e64 v2, v1, |v5|
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v1, |v5|
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fminimum3_v2f32__fabs_all:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_min_f32_e64 v6, |v1|, |v3|
+; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v1|, |v3|
+; GFX940-NEXT: v_min_f32_e64 v3, |v0|, |v2|
+; GFX940-NEXT: s_nop 0
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v2|
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
+; GFX940-NEXT: v_min_f32_e64 v2, v0, |v4|
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, |v4|
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
+; GFX940-NEXT: v_min_f32_e64 v2, v1, |v5|
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v1, |v5|
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fminimum3_v2f32__fabs_all:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_minimum3_f32 v1, |v1|, |v3|, |v3|
+; GFX950-NEXT: v_minimum3_f32 v0, |v0|, |v2|, |v2|
+; GFX950-NEXT: v_minimum3_f32 v0, v0, |v4|, |v4|
+; GFX950-NEXT: v_minimum3_f32 v1, v1, |v5|, |v5|
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a)
%b.fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %b)
%c.fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %c)
@@ -647,27 +799,36 @@ define <2 x float> @v_fminimum3_v2f32__fneg_all(<2 x float> %a, <2 x float> %b,
; GFX12-NEXT: v_minimum3_f32 v1, -v1, -v3, -v5
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fminimum3_v2f32__fneg_all:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e64 v6, -v1, -v3
-; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v1, -v3
-; GFX9-NEXT: v_min_f32_e64 v3, -v0, -v2
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v2
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
-; GFX9-NEXT: v_min_f32_e64 v2, v0, -v4
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v4
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX9-NEXT: v_min_f32_e64 v2, v1, -v5
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v1, -v5
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fminimum3_v2f32__fneg_all:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_min_f32_e64 v6, -v1, -v3
+; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v1, -v3
+; GFX940-NEXT: v_min_f32_e64 v3, -v0, -v2
+; GFX940-NEXT: s_nop 0
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v2
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
+; GFX940-NEXT: v_min_f32_e64 v2, v0, -v4
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, -v4
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
+; GFX940-NEXT: v_min_f32_e64 v2, v1, -v5
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v1, -v5
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fminimum3_v2f32__fneg_all:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_minimum3_f32 v1, -v1, -v3, -v3
+; GFX950-NEXT: v_minimum3_f32 v0, -v0, -v2, -v2
+; GFX950-NEXT: v_minimum3_f32 v0, v0, -v4, -v4
+; GFX950-NEXT: v_minimum3_f32 v1, v1, -v5, -v5
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg <2 x float> %a
%b.fneg = fneg <2 x float> %b
%c.fneg = fneg <2 x float> %c
@@ -688,27 +849,36 @@ define <2 x float> @v_fminimum3_v2f32__inlineimm1(<2 x float> %a, <2 x float> %c
; GFX12-NEXT: v_minimum3_f32 v1, v1, 2.0, v3
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fminimum3_v2f32__inlineimm1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e32 v4, 2.0, v1
-; GFX9-NEXT: v_mov_b32_e32 v5, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v1
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX9-NEXT: v_min_f32_e32 v4, 2.0, v0
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX9-NEXT: v_min_f32_e32 v4, v0, v2
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_min_f32_e32 v2, v1, v3
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fminimum3_v2f32__inlineimm1:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_min_f32_e32 v4, 2.0, v1
+; GFX940-NEXT: v_mov_b32_e32 v5, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v1
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
+; GFX940-NEXT: v_min_f32_e32 v4, 2.0, v0
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
+; GFX940-NEXT: v_min_f32_e32 v4, v0, v2
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX940-NEXT: v_min_f32_e32 v2, v1, v3
+; GFX940-NEXT: s_nop 0
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fminimum3_v2f32__inlineimm1:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_minimum3_f32 v1, v1, 2.0, 2.0
+; GFX950-NEXT: v_minimum3_f32 v0, v0, 2.0, 2.0
+; GFX950-NEXT: v_minimum3_f32 v0, v0, v2, v2
+; GFX950-NEXT: v_minimum3_f32 v1, v1, v3, v3
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %a, <2 x float> <float 2.0, float 2.0>)
%max1 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %max0, <2 x float> %c)
ret <2 x float> %max1
@@ -726,27 +896,36 @@ define <2 x float> @v_fminimum3_v2f32__inlineimm2(<2 x float> %a, <2 x float> %b
; GFX12-NEXT: v_minimum3_f32 v1, v1, v3, 4.0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fminimum3_v2f32__inlineimm2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e32 v4, v1, v3
-; GFX9-NEXT: v_mov_b32_e32 v5, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_min_f32_e32 v3, v0, v2
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
-; GFX9-NEXT: v_min_f32_e32 v2, 4.0, v0
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc
-; GFX9-NEXT: v_min_f32_e32 v2, 4.0, v1
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v1
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fminimum3_v2f32__inlineimm2:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_min_f32_e32 v4, v1, v3
+; GFX940-NEXT: v_mov_b32_e32 v5, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
+; GFX940-NEXT: v_min_f32_e32 v3, v0, v2
+; GFX940-NEXT: s_nop 0
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
+; GFX940-NEXT: v_min_f32_e32 v2, 4.0, v0
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc
+; GFX940-NEXT: v_min_f32_e32 v2, 4.0, v1
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v1
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fminimum3_v2f32__inlineimm2:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_minimum3_f32 v1, v1, v3, v3
+; GFX950-NEXT: v_minimum3_f32 v0, v0, v2, v2
+; GFX950-NEXT: v_minimum3_f32 v0, v0, 4.0, 4.0
+; GFX950-NEXT: v_minimum3_f32 v1, v1, 4.0, 4.0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %a, <2 x float> %b)
%max1 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %max0, <2 x float> <float 4.0, float 4.0>)
ret <2 x float> %max1
@@ -765,35 +944,46 @@ define <3 x float> @v_fminimum3_v3f32(<3 x float> %a, <3 x float> %b, <3 x float
; GFX12-NEXT: v_minimum3_f32 v2, v8, v2, v5
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fminimum3_v3f32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e32 v9, v2, v5
-; GFX9-NEXT: v_mov_b32_e32 v10, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_min_f32_e32 v5, v1, v4
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_min_f32_e32 v4, v0, v3
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc
-; GFX9-NEXT: v_min_f32_e32 v3, v6, v0
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v6, v0
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc
-; GFX9-NEXT: v_min_f32_e32 v3, v7, v1
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v7, v1
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc
-; GFX9-NEXT: v_min_f32_e32 v3, v8, v2
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v8, v2
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fminimum3_v3f32:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_min_f32_e32 v9, v2, v5
+; GFX940-NEXT: v_mov_b32_e32 v10, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
+; GFX940-NEXT: v_min_f32_e32 v5, v1, v4
+; GFX940-NEXT: s_nop 0
+; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
+; GFX940-NEXT: v_min_f32_e32 v4, v0, v3
+; GFX940-NEXT: s_nop 0
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc
+; GFX940-NEXT: v_min_f32_e32 v3, v6, v0
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v6, v0
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc
+; GFX940-NEXT: v_min_f32_e32 v3, v7, v1
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v7, v1
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc
+; GFX940-NEXT: v_min_f32_e32 v3, v8, v2
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v8, v2
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fminimum3_v3f32:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_minimum3_f32 v2, v2, v5, v5
+; GFX950-NEXT: v_minimum3_f32 v1, v1, v4, v4
+; GFX950-NEXT: v_minimum3_f32 v0, v0, v3, v3
+; GFX950-NEXT: v_minimum3_f32 v0, v6, v0, v0
+; GFX950-NEXT: v_minimum3_f32 v1, v7, v1, v1
+; GFX950-NEXT: v_minimum3_f32 v2, v8, v2, v2
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %a, <3 x float> %b)
%max1 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %c, <3 x float> %max0)
ret <3 x float> %max1
@@ -812,35 +1002,46 @@ define <3 x float> @v_fminimum3_v3f32_commute(<3 x float> %a, <3 x float> %b, <3
; GFX12-NEXT: v_minimum3_f32 v2, v2, v5, v8
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fminimum3_v3f32_commute:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e32 v9, v2, v5
-; GFX9-NEXT: v_mov_b32_e32 v10, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_min_f32_e32 v5, v1, v4
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_min_f32_e32 v4, v0, v3
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc
-; GFX9-NEXT: v_min_f32_e32 v3, v0, v6
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v6
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc
-; GFX9-NEXT: v_min_f32_e32 v3, v1, v7
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v7
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc
-; GFX9-NEXT: v_min_f32_e32 v3, v2, v8
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v8
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fminimum3_v3f32_commute:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_min_f32_e32 v9, v2, v5
+; GFX940-NEXT: v_mov_b32_e32 v10, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
+; GFX940-NEXT: v_min_f32_e32 v5, v1, v4
+; GFX940-NEXT: s_nop 0
+; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
+; GFX940-NEXT: v_min_f32_e32 v4, v0, v3
+; GFX940-NEXT: s_nop 0
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc
+; GFX940-NEXT: v_min_f32_e32 v3, v0, v6
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v6
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc
+; GFX940-NEXT: v_min_f32_e32 v3, v1, v7
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v7
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc
+; GFX940-NEXT: v_min_f32_e32 v3, v2, v8
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v8
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fminimum3_v3f32_commute:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_minimum3_f32 v2, v2, v5, v5
+; GFX950-NEXT: v_minimum3_f32 v1, v1, v4, v4
+; GFX950-NEXT: v_minimum3_f32 v0, v0, v3, v3
+; GFX950-NEXT: v_minimum3_f32 v0, v0, v6, v6
+; GFX950-NEXT: v_minimum3_f32 v1, v1, v7, v7
+; GFX950-NEXT: v_minimum3_f32 v2, v2, v8, v8
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %a, <3 x float> %b)
%max1 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %max0, <3 x float> %c)
ret <3 x float> %max1
@@ -859,35 +1060,46 @@ define <3 x float> @v_fminimum3_v3f32__fabs_all(<3 x float> %a, <3 x float> %b,
; GFX12-NEXT: v_minimum3_f32 v2, |v2|, |v5|, |v8|
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fminimum3_v3f32__fabs_all:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e64 v9, |v2|, |v5|
-; GFX9-NEXT: v_mov_b32_e32 v10, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v2|, |v5|
-; GFX9-NEXT: v_min_f32_e64 v5, |v1|, |v4|
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v1|, |v4|
-; GFX9-NEXT: v_min_f32_e64 v4, |v0|, |v3|
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v3|
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc
-; GFX9-NEXT: v_min_f32_e64 v3, v0, |v6|
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v6|
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc
-; GFX9-NEXT: v_min_f32_e64 v3, v1, |v7|
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v1, |v7|
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc
-; GFX9-NEXT: v_min_f32_e64 v3, v2, |v8|
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v2, |v8|
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fminimum3_v3f32__fabs_all:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_min_f32_e64 v9, |v2|, |v5|
+; GFX940-NEXT: v_mov_b32_e32 v10, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v2|, |v5|
+; GFX940-NEXT: v_min_f32_e64 v5, |v1|, |v4|
+; GFX940-NEXT: s_nop 0
+; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v1|, |v4|
+; GFX940-NEXT: v_min_f32_e64 v4, |v0|, |v3|
+; GFX940-NEXT: s_nop 0
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v3|
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc
+; GFX940-NEXT: v_min_f32_e64 v3, v0, |v6|
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, |v6|
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc
+; GFX940-NEXT: v_min_f32_e64 v3, v1, |v7|
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v1, |v7|
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc
+; GFX940-NEXT: v_min_f32_e64 v3, v2, |v8|
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v2, |v8|
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fminimum3_v3f32__fabs_all:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_minimum3_f32 v2, |v2|, |v5|, |v5|
+; GFX950-NEXT: v_minimum3_f32 v1, |v1|, |v4|, |v4|
+; GFX950-NEXT: v_minimum3_f32 v0, |v0|, |v3|, |v3|
+; GFX950-NEXT: v_minimum3_f32 v0, v0, |v6|, |v6|
+; GFX950-NEXT: v_minimum3_f32 v1, v1, |v7|, |v7|
+; GFX950-NEXT: v_minimum3_f32 v2, v2, |v8|, |v8|
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call <3 x float> @llvm.fabs.v3f32(<3 x float> %a)
%b.fabs = call <3 x float> @llvm.fabs.v3f32(<3 x float> %b)
%c.fabs = call <3 x float> @llvm.fabs.v3f32(<3 x float> %c)
@@ -909,35 +1121,46 @@ define <3 x float> @v_fminimum3_v3f32__fneg_all(<3 x float> %a, <3 x float> %b,
; GFX12-NEXT: v_minimum3_f32 v2, -v2, -v5, -v8
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fminimum3_v3f32__fneg_all:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e64 v9, -v2, -v5
-; GFX9-NEXT: v_mov_b32_e32 v10, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v2, -v5
-; GFX9-NEXT: v_min_f32_e64 v5, -v1, -v4
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v1, -v4
-; GFX9-NEXT: v_min_f32_e64 v4, -v0, -v3
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v3
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc
-; GFX9-NEXT: v_min_f32_e64 v3, v0, -v6
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v6
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc
-; GFX9-NEXT: v_min_f32_e64 v3, v1, -v7
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v1, -v7
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc
-; GFX9-NEXT: v_min_f32_e64 v3, v2, -v8
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v2, -v8
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fminimum3_v3f32__fneg_all:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_min_f32_e64 v9, -v2, -v5
+; GFX940-NEXT: v_mov_b32_e32 v10, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v2, -v5
+; GFX940-NEXT: v_min_f32_e64 v5, -v1, -v4
+; GFX940-NEXT: s_nop 0
+; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v1, -v4
+; GFX940-NEXT: v_min_f32_e64 v4, -v0, -v3
+; GFX940-NEXT: s_nop 0
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v3
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc
+; GFX940-NEXT: v_min_f32_e64 v3, v0, -v6
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, -v6
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc
+; GFX940-NEXT: v_min_f32_e64 v3, v1, -v7
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v1, -v7
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc
+; GFX940-NEXT: v_min_f32_e64 v3, v2, -v8
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v2, -v8
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fminimum3_v3f32__fneg_all:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_minimum3_f32 v2, -v2, -v5, -v5
+; GFX950-NEXT: v_minimum3_f32 v1, -v1, -v4, -v4
+; GFX950-NEXT: v_minimum3_f32 v0, -v0, -v3, -v3
+; GFX950-NEXT: v_minimum3_f32 v0, v0, -v6, -v6
+; GFX950-NEXT: v_minimum3_f32 v1, v1, -v7, -v7
+; GFX950-NEXT: v_minimum3_f32 v2, v2, -v8, -v8
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg <3 x float> %a
%b.fneg = fneg <3 x float> %b
%c.fneg = fneg <3 x float> %c
@@ -959,35 +1182,46 @@ define <3 x float> @v_fminimum3_v3f32__inlineimm1(<3 x float> %a, <3 x float> %c
; GFX12-NEXT: v_minimum3_f32 v2, v2, 2.0, v5
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fminimum3_v3f32__inlineimm1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e32 v6, 2.0, v2
-; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v2
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc
-; GFX9-NEXT: v_min_f32_e32 v6, 2.0, v1
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v1
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
-; GFX9-NEXT: v_min_f32_e32 v6, 2.0, v0
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX9-NEXT: v_min_f32_e32 v6, v0, v3
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_min_f32_e32 v3, v1, v4
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX9-NEXT: v_min_f32_e32 v3, v2, v5
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fminimum3_v3f32__inlineimm1:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_min_f32_e32 v6, 2.0, v2
+; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v2
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc
+; GFX940-NEXT: v_min_f32_e32 v6, 2.0, v1
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v1
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
+; GFX940-NEXT: v_min_f32_e32 v6, 2.0, v0
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX940-NEXT: v_min_f32_e32 v6, v0, v3
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
+; GFX940-NEXT: v_min_f32_e32 v3, v1, v4
+; GFX940-NEXT: s_nop 0
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
+; GFX940-NEXT: v_min_f32_e32 v3, v2, v5
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fminimum3_v3f32__inlineimm1:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_minimum3_f32 v2, v2, 2.0, 2.0
+; GFX950-NEXT: v_minimum3_f32 v1, v1, 2.0, 2.0
+; GFX950-NEXT: v_minimum3_f32 v0, v0, 2.0, 2.0
+; GFX950-NEXT: v_minimum3_f32 v0, v0, v3, v3
+; GFX950-NEXT: v_minimum3_f32 v1, v1, v4, v4
+; GFX950-NEXT: v_minimum3_f32 v2, v2, v5, v5
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %a, <3 x float> <float 2.0, float 2.0, float 2.0>)
%max1 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %max0, <3 x float> %c)
ret <3 x float> %max1
@@ -1006,35 +1240,46 @@ define <3 x float> @v_fminimum3_v3f32__inlineimm2(<3 x float> %a, <3 x float> %b
; GFX12-NEXT: v_minimum3_f32 v2, v2, v5, 4.0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fminimum3_v3f32__inlineimm2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e32 v6, v2, v5
-; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_min_f32_e32 v5, v1, v4
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_min_f32_e32 v4, v0, v3
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v5, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v4, vcc
-; GFX9-NEXT: v_min_f32_e32 v3, 4.0, v0
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
-; GFX9-NEXT: v_min_f32_e32 v3, 4.0, v1
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v1
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX9-NEXT: v_min_f32_e32 v3, 4.0, v2
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v2
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fminimum3_v3f32__inlineimm2:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_min_f32_e32 v6, v2, v5
+; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
+; GFX940-NEXT: v_min_f32_e32 v5, v1, v4
+; GFX940-NEXT: s_nop 0
+; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
+; GFX940-NEXT: v_min_f32_e32 v4, v0, v3
+; GFX940-NEXT: s_nop 0
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v5, vcc
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v4, vcc
+; GFX940-NEXT: v_min_f32_e32 v3, 4.0, v0
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
+; GFX940-NEXT: v_min_f32_e32 v3, 4.0, v1
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v1
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
+; GFX940-NEXT: v_min_f32_e32 v3, 4.0, v2
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v2
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fminimum3_v3f32__inlineimm2:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_minimum3_f32 v2, v2, v5, v5
+; GFX950-NEXT: v_minimum3_f32 v1, v1, v4, v4
+; GFX950-NEXT: v_minimum3_f32 v0, v0, v3, v3
+; GFX950-NEXT: v_minimum3_f32 v0, v0, 4.0, 4.0
+; GFX950-NEXT: v_minimum3_f32 v1, v1, 4.0, 4.0
+; GFX950-NEXT: v_minimum3_f32 v2, v2, 4.0, 4.0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %a, <3 x float> %b)
%max1 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %max0, <3 x float> <float 4.0, float 4.0, float 4.0>)
ret <3 x float> %max1
@@ -3165,19 +3410,26 @@ define <2 x float> @v_no_fminimum3_f32__multi_use(float %a, float %b, float %c)
; GFX12-NEXT: v_minimum_f32 v1, v0, v2
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_no_fminimum3_f32__multi_use:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e32 v3, v0, v1
-; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-NEXT: v_min_f32_e32 v1, v0, v2
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_no_fminimum3_f32__multi_use:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_min_f32_e32 v3, v0, v1
+; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX940-NEXT: v_min_f32_e32 v1, v0, v2
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_no_fminimum3_f32__multi_use:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, v1
+; GFX950-NEXT: v_minimum3_f32 v1, v0, v2, v2
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.minimum.f32(float %a, float %b)
%max1 = call float @llvm.minimum.f32(float %max0, float %c)
%insert.0 = insertelement <2 x float> poison, float %max0, i32 0
@@ -3193,22 +3445,31 @@ define amdgpu_ps <2 x i32> @s_no_fminimum3_f32__multi_use(float inreg %a, float
; GFX12-NEXT: s_minimum_f32 s1, s0, s2
; GFX12-NEXT: ; return to shader part epilog
;
-; GFX9-LABEL: s_no_fminimum3_f32__multi_use:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: v_mov_b32_e32 v0, s1
-; GFX9-NEXT: v_min_f32_e32 v1, s0, v0
-; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s0, v0
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX9-NEXT: v_min_f32_e32 v1, s2, v0
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s2, v0
-; GFX9-NEXT: v_readfirstlane_b32 s0, v0
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: v_readfirstlane_b32 s1, v1
-; GFX9-NEXT: ; return to shader part epilog
+; GFX940-LABEL: s_no_fminimum3_f32__multi_use:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: v_mov_b32_e32 v0, s1
+; GFX940-NEXT: v_min_f32_e32 v1, s0, v0
+; GFX940-NEXT: v_mov_b32_e32 v2, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, s0, v0
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX940-NEXT: v_min_f32_e32 v1, s2, v0
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, s2, v0
+; GFX940-NEXT: v_readfirstlane_b32 s0, v0
+; GFX940-NEXT: s_nop 0
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX940-NEXT: s_nop 0
+; GFX940-NEXT: v_readfirstlane_b32 s1, v1
+; GFX940-NEXT: ; return to shader part epilog
+;
+; GFX950-LABEL: s_no_fminimum3_f32__multi_use:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: v_mov_b32_e32 v0, s0
+; GFX950-NEXT: v_minimum3_f32 v0, v0, s1, s1
+; GFX950-NEXT: v_minimum3_f32 v1, v0, s2, s2
+; GFX950-NEXT: v_readfirstlane_b32 s0, v0
+; GFX950-NEXT: v_readfirstlane_b32 s1, v1
+; GFX950-NEXT: ; return to shader part epilog
%max0 = call float @llvm.minimum.f32(float %a, float %b)
%max1 = call float @llvm.minimum.f32(float %max0, float %c)
%cast0 = bitcast float %max0 to i32
@@ -3372,6 +3633,3 @@ define <2 x double> @v_no_fminimum3_f64__multi_use(double %a, double %b, double
%insert.1 = insertelement <2 x double> %insert.0, double %max1, i32 1
ret <2 x double> %insert.1
}
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; GFX940: {{.*}}
-; GFX950: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll
index c1fdfa2c4cf9ab..df7355c2c57bfa 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll
@@ -39,11 +39,7 @@ define float @v_maximum_f32(float %src0, float %src1) {
; GFX950-LABEL: v_maximum_f32:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_max_f32_e32 v2, v0, v1
-; GFX950-NEXT: v_mov_b32_e32 v3, 0x7fc00000
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_f32:
@@ -89,11 +85,17 @@ define float @v_maximum_f32__nnan(float %src0, float %src1) {
; GFX8-NEXT: v_max_f32_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_f32__nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_f32__nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_f32__nnan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_f32__nnan:
; GFX10: ; %bb.0:
@@ -151,11 +153,7 @@ define float @v_maximum_f32__nsz(float %src0, float %src1) {
; GFX950-LABEL: v_maximum_f32__nsz:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_max_f32_e32 v2, v0, v1
-; GFX950-NEXT: v_mov_b32_e32 v3, 0x7fc00000
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_f32__nsz:
@@ -201,11 +199,17 @@ define float @v_maximum_f32__nnan_nsz(float %src0, float %src1) {
; GFX8-NEXT: v_max_f32_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_f32__nnan_nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_f32__nnan_nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_f32__nnan_nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_f32__nnan_nsz:
; GFX10: ; %bb.0:
@@ -267,11 +271,7 @@ define float @v_maximum_f32__nnan_src0(float %arg0, float %src1) {
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_add_f32_e32 v0, 1.0, v0
-; GFX950-NEXT: v_max_f32_e32 v2, v0, v1
-; GFX950-NEXT: v_mov_b32_e32 v3, 0x7fc00000
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_f32__nnan_src0:
@@ -344,11 +344,7 @@ define float @v_maximum_f32__nnan_src1(float %src0, float %arg1) {
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_add_f32_e32 v1, 1.0, v1
-; GFX950-NEXT: v_max_f32_e32 v2, v0, v1
-; GFX950-NEXT: v_mov_b32_e32 v3, 0x7fc00000
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_f32__nnan_src1:
@@ -429,12 +425,8 @@ define void @s_maximum_f32(float inreg %src0, float inreg %src1) {
; GFX950-LABEL: s_maximum_f32:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_mov_b32_e32 v0, s1
-; GFX950-NEXT: v_max_f32_e32 v1, s0, v0
-; GFX950-NEXT: v_mov_b32_e32 v2, 0x7fc00000
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, s0, v0
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX950-NEXT: v_mov_b32_e32 v0, s0
+; GFX950-NEXT: v_maximum3_f32 v0, v0, s1, s1
; GFX950-NEXT: ;;#ASMSTART
; GFX950-NEXT: ; use v0
; GFX950-NEXT: ;;#ASMEND
@@ -521,15 +513,8 @@ define <2 x float> @v_maximum_v2f32(<2 x float> %src0, <2 x float> %src1) {
; GFX950-LABEL: v_maximum_v2f32:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_max_f32_e32 v4, v0, v2
-; GFX950-NEXT: v_mov_b32_e32 v5, 0x7fc00000
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX950-NEXT: v_max_f32_e32 v2, v1, v3
-; GFX950-NEXT: s_nop 0
-; GFX950-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2
+; GFX950-NEXT: v_maximum3_f32 v1, v1, v3, v3
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v2f32:
@@ -583,12 +568,19 @@ define <2 x float> @v_maximum_v2f32__nnan(<2 x float> %src0, <2 x float> %src1)
; GFX8-NEXT: v_max_f32_e32 v1, v1, v3
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v2f32__nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v2
-; GFX9-NEXT: v_max_f32_e32 v1, v1, v3
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v2f32__nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX900-NEXT: v_max_f32_e32 v1, v1, v3
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v2f32__nnan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2
+; GFX950-NEXT: v_maximum3_f32 v1, v1, v3, v3
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v2f32__nnan:
; GFX10: ; %bb.0:
@@ -657,15 +649,8 @@ define <2 x float> @v_maximum_v2f32__nsz(<2 x float> %src0, <2 x float> %src1) {
; GFX950-LABEL: v_maximum_v2f32__nsz:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_max_f32_e32 v4, v0, v2
-; GFX950-NEXT: v_mov_b32_e32 v5, 0x7fc00000
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX950-NEXT: v_max_f32_e32 v2, v1, v3
-; GFX950-NEXT: s_nop 0
-; GFX950-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2
+; GFX950-NEXT: v_maximum3_f32 v1, v1, v3, v3
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v2f32__nsz:
@@ -719,12 +704,19 @@ define <2 x float> @v_maximum_v2f32__nnan_nsz(<2 x float> %src0, <2 x float> %sr
; GFX8-NEXT: v_max_f32_e32 v1, v1, v3
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v2f32__nnan_nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v2
-; GFX9-NEXT: v_max_f32_e32 v1, v1, v3
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v2f32__nnan_nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX900-NEXT: v_max_f32_e32 v1, v1, v3
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v2f32__nnan_nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2
+; GFX950-NEXT: v_maximum3_f32 v1, v1, v3, v3
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v2f32__nnan_nsz:
; GFX10: ; %bb.0:
@@ -808,16 +800,10 @@ define void @s_maximum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) {
; GFX950-LABEL: s_maximum_v2f32:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_mov_b32_e32 v0, s3
-; GFX950-NEXT: v_max_f32_e32 v1, s1, v0
-; GFX950-NEXT: v_mov_b32_e32 v2, 0x7fc00000
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, s1, v0
-; GFX950-NEXT: v_mov_b32_e32 v0, s2
-; GFX950-NEXT: v_max_f32_e32 v3, s0, v0
-; GFX950-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, s0, v0
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
+; GFX950-NEXT: v_mov_b32_e32 v0, s1
+; GFX950-NEXT: v_maximum3_f32 v1, v0, s3, s3
+; GFX950-NEXT: v_mov_b32_e32 v0, s0
+; GFX950-NEXT: v_maximum3_f32 v0, v0, s2, s2
; GFX950-NEXT: ;;#ASMSTART
; GFX950-NEXT: ; use v[0:1]
; GFX950-NEXT: ;;#ASMEND
@@ -920,19 +906,9 @@ define <3 x float> @v_maximum_v3f32(<3 x float> %src0, <3 x float> %src1) {
; GFX950-LABEL: v_maximum_v3f32:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_max_f32_e32 v6, v0, v3
-; GFX950-NEXT: v_mov_b32_e32 v7, 0x7fc00000
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX950-NEXT: v_max_f32_e32 v3, v1, v4
-; GFX950-NEXT: s_nop 0
-; GFX950-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX950-NEXT: v_max_f32_e32 v3, v2, v5
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
+; GFX950-NEXT: v_maximum3_f32 v0, v0, v3, v3
+; GFX950-NEXT: v_maximum3_f32 v1, v1, v4, v4
+; GFX950-NEXT: v_maximum3_f32 v2, v2, v5, v5
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v3f32:
@@ -995,13 +971,21 @@ define <3 x float> @v_maximum_v3f32__nnan(<3 x float> %src0, <3 x float> %src1)
; GFX8-NEXT: v_max_f32_e32 v2, v2, v5
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v3f32__nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v3
-; GFX9-NEXT: v_max_f32_e32 v1, v1, v4
-; GFX9-NEXT: v_max_f32_e32 v2, v2, v5
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v3f32__nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f32_e32 v0, v0, v3
+; GFX900-NEXT: v_max_f32_e32 v1, v1, v4
+; GFX900-NEXT: v_max_f32_e32 v2, v2, v5
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v3f32__nnan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_maximum3_f32 v0, v0, v3, v3
+; GFX950-NEXT: v_maximum3_f32 v1, v1, v4, v4
+; GFX950-NEXT: v_maximum3_f32 v2, v2, v5, v5
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v3f32__nnan:
; GFX10: ; %bb.0:
@@ -1082,19 +1066,9 @@ define <3 x float> @v_maximum_v3f32__nsz(<3 x float> %src0, <3 x float> %src1) {
; GFX950-LABEL: v_maximum_v3f32__nsz:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_max_f32_e32 v6, v0, v3
-; GFX950-NEXT: v_mov_b32_e32 v7, 0x7fc00000
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX950-NEXT: v_max_f32_e32 v3, v1, v4
-; GFX950-NEXT: s_nop 0
-; GFX950-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX950-NEXT: v_max_f32_e32 v3, v2, v5
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
+; GFX950-NEXT: v_maximum3_f32 v0, v0, v3, v3
+; GFX950-NEXT: v_maximum3_f32 v1, v1, v4, v4
+; GFX950-NEXT: v_maximum3_f32 v2, v2, v5, v5
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v3f32__nsz:
@@ -1157,13 +1131,21 @@ define <3 x float> @v_maximum_v3f32__nnan_nsz(<3 x float> %src0, <3 x float> %sr
; GFX8-NEXT: v_max_f32_e32 v2, v2, v5
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v3f32__nnan_nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v3
-; GFX9-NEXT: v_max_f32_e32 v1, v1, v4
-; GFX9-NEXT: v_max_f32_e32 v2, v2, v5
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v3f32__nnan_nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f32_e32 v0, v0, v3
+; GFX900-NEXT: v_max_f32_e32 v1, v1, v4
+; GFX900-NEXT: v_max_f32_e32 v2, v2, v5
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v3f32__nnan_nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_maximum3_f32 v0, v0, v3, v3
+; GFX950-NEXT: v_maximum3_f32 v1, v1, v4, v4
+; GFX950-NEXT: v_maximum3_f32 v2, v2, v5, v5
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v3f32__nnan_nsz:
; GFX10: ; %bb.0:
@@ -1253,23 +1235,10 @@ define <4 x float> @v_maximum_v4f32(<4 x float> %src0, <4 x float> %src1) {
; GFX950-LABEL: v_maximum_v4f32:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_max_f32_e32 v8, v0, v4
-; GFX950-NEXT: v_mov_b32_e32 v9, 0x7fc00000
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
-; GFX950-NEXT: v_max_f32_e32 v4, v1, v5
-; GFX950-NEXT: s_nop 0
-; GFX950-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
-; GFX950-NEXT: v_max_f32_e32 v4, v2, v6
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
-; GFX950-NEXT: v_max_f32_e32 v4, v3, v7
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
+; GFX950-NEXT: v_maximum3_f32 v0, v0, v4, v4
+; GFX950-NEXT: v_maximum3_f32 v1, v1, v5, v5
+; GFX950-NEXT: v_maximum3_f32 v2, v2, v6, v6
+; GFX950-NEXT: v_maximum3_f32 v3, v3, v7, v7
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v4f32:
@@ -1341,14 +1310,23 @@ define <4 x float> @v_maximum_v4f32__nnan(<4 x float> %src0, <4 x float> %src1)
; GFX8-NEXT: v_max_f32_e32 v3, v3, v7
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v4f32__nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v4
-; GFX9-NEXT: v_max_f32_e32 v1, v1, v5
-; GFX9-NEXT: v_max_f32_e32 v2, v2, v6
-; GFX9-NEXT: v_max_f32_e32 v3, v3, v7
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v4f32__nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f32_e32 v0, v0, v4
+; GFX900-NEXT: v_max_f32_e32 v1, v1, v5
+; GFX900-NEXT: v_max_f32_e32 v2, v2, v6
+; GFX900-NEXT: v_max_f32_e32 v3, v3, v7
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v4f32__nnan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_maximum3_f32 v0, v0, v4, v4
+; GFX950-NEXT: v_maximum3_f32 v1, v1, v5, v5
+; GFX950-NEXT: v_maximum3_f32 v2, v2, v6, v6
+; GFX950-NEXT: v_maximum3_f32 v3, v3, v7, v7
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v4f32__nnan:
; GFX10: ; %bb.0:
@@ -1440,23 +1418,10 @@ define <4 x float> @v_maximum_v4f32__nsz(<4 x float> %src0, <4 x float> %src1) {
; GFX950-LABEL: v_maximum_v4f32__nsz:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_max_f32_e32 v8, v0, v4
-; GFX950-NEXT: v_mov_b32_e32 v9, 0x7fc00000
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
-; GFX950-NEXT: v_max_f32_e32 v4, v1, v5
-; GFX950-NEXT: s_nop 0
-; GFX950-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
-; GFX950-NEXT: v_max_f32_e32 v4, v2, v6
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
-; GFX950-NEXT: v_max_f32_e32 v4, v3, v7
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
+; GFX950-NEXT: v_maximum3_f32 v0, v0, v4, v4
+; GFX950-NEXT: v_maximum3_f32 v1, v1, v5, v5
+; GFX950-NEXT: v_maximum3_f32 v2, v2, v6, v6
+; GFX950-NEXT: v_maximum3_f32 v3, v3, v7, v7
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v4f32__nsz:
@@ -1528,14 +1493,23 @@ define <4 x float> @v_maximum_v4f32__nnan_nsz(<4 x float> %src0, <4 x float> %sr
; GFX8-NEXT: v_max_f32_e32 v3, v3, v7
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v4f32__nnan_nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v4
-; GFX9-NEXT: v_max_f32_e32 v1, v1, v5
-; GFX9-NEXT: v_max_f32_e32 v2, v2, v6
-; GFX9-NEXT: v_max_f32_e32 v3, v3, v7
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v4f32__nnan_nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f32_e32 v0, v0, v4
+; GFX900-NEXT: v_max_f32_e32 v1, v1, v5
+; GFX900-NEXT: v_max_f32_e32 v2, v2, v6
+; GFX900-NEXT: v_max_f32_e32 v3, v3, v7
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v4f32__nnan_nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_maximum3_f32 v0, v0, v4, v4
+; GFX950-NEXT: v_maximum3_f32 v1, v1, v5, v5
+; GFX950-NEXT: v_maximum3_f32 v2, v2, v6, v6
+; GFX950-NEXT: v_maximum3_f32 v3, v3, v7, v7
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v4f32__nnan_nsz:
; GFX10: ; %bb.0:
@@ -1663,39 +1637,14 @@ define <8 x float> @v_maximum_v8f32(<8 x float> %src0, <8 x float> %src1) {
; GFX950-LABEL: v_maximum_v8f32:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_max_f32_e32 v16, v0, v8
-; GFX950-NEXT: v_mov_b32_e32 v17, 0x7fc00000
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v8
-; GFX950-NEXT: v_max_f32_e32 v8, v1, v9
-; GFX950-NEXT: s_nop 0
-; GFX950-NEXT: v_cndmask_b32_e32 v0, v17, v16, vcc
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v9
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v1, v17, v8, vcc
-; GFX950-NEXT: v_max_f32_e32 v8, v2, v10
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v2, v10
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v2, v17, v8, vcc
-; GFX950-NEXT: v_max_f32_e32 v8, v3, v11
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v3, v11
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v3, v17, v8, vcc
-; GFX950-NEXT: v_max_f32_e32 v8, v4, v12
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v4, v12
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v4, v17, v8, vcc
-; GFX950-NEXT: v_max_f32_e32 v8, v5, v13
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v5, v13
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v5, v17, v8, vcc
-; GFX950-NEXT: v_max_f32_e32 v8, v6, v14
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v6, v14
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v6, v17, v8, vcc
-; GFX950-NEXT: v_max_f32_e32 v8, v7, v15
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v7, v15
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v7, v17, v8, vcc
+; GFX950-NEXT: v_maximum3_f32 v0, v0, v8, v8
+; GFX950-NEXT: v_maximum3_f32 v1, v1, v9, v9
+; GFX950-NEXT: v_maximum3_f32 v2, v2, v10, v10
+; GFX950-NEXT: v_maximum3_f32 v3, v3, v11, v11
+; GFX950-NEXT: v_maximum3_f32 v4, v4, v12, v12
+; GFX950-NEXT: v_maximum3_f32 v5, v5, v13, v13
+; GFX950-NEXT: v_maximum3_f32 v6, v6, v14, v14
+; GFX950-NEXT: v_maximum3_f32 v7, v7, v15, v15
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v8f32:
@@ -1980,64 +1929,23 @@ define <16 x float> @v_maximum_v16f32(<16 x float> %src0, <16 x float> %src1) {
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: scratch_load_dword v31, off, s32
-; GFX950-NEXT: v_mov_b32_e32 v32, 0x7fc00000
-; GFX950-NEXT: v_max_f32_e32 v33, v0, v16
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v16
-; GFX950-NEXT: v_max_f32_e32 v34, v1, v17
-; GFX950-NEXT: v_max_f32_e32 v35, v2, v18
-; GFX950-NEXT: v_cndmask_b32_e32 v0, v32, v33, vcc
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v17
-; GFX950-NEXT: v_max_f32_e32 v36, v3, v19
-; GFX950-NEXT: v_max_f32_e32 v37, v4, v20
-; GFX950-NEXT: v_cndmask_b32_e32 v1, v32, v34, vcc
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v2, v18
-; GFX950-NEXT: v_max_f32_e32 v38, v5, v21
-; GFX950-NEXT: v_max_f32_e32 v39, v6, v22
-; GFX950-NEXT: v_cndmask_b32_e32 v2, v32, v35, vcc
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v3, v19
-; GFX950-NEXT: v_max_f32_e32 v48, v7, v23
-; GFX950-NEXT: v_max_f32_e32 v49, v8, v24
-; GFX950-NEXT: v_cndmask_b32_e32 v3, v32, v36, vcc
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v4, v20
-; GFX950-NEXT: v_max_f32_e32 v50, v9, v25
-; GFX950-NEXT: v_max_f32_e32 v51, v10, v26
-; GFX950-NEXT: v_cndmask_b32_e32 v4, v32, v37, vcc
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v5, v21
-; GFX950-NEXT: v_max_f32_e32 v52, v11, v27
-; GFX950-NEXT: v_max_f32_e32 v53, v12, v28
-; GFX950-NEXT: v_cndmask_b32_e32 v5, v32, v38, vcc
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v6, v22
-; GFX950-NEXT: v_max_f32_e32 v54, v13, v29
-; GFX950-NEXT: v_max_f32_e32 v55, v14, v30
-; GFX950-NEXT: v_cndmask_b32_e32 v6, v32, v39, vcc
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v7, v23
+; GFX950-NEXT: v_maximum3_f32 v0, v0, v16, v16
+; GFX950-NEXT: v_maximum3_f32 v1, v1, v17, v17
+; GFX950-NEXT: v_maximum3_f32 v2, v2, v18, v18
+; GFX950-NEXT: v_maximum3_f32 v3, v3, v19, v19
+; GFX950-NEXT: v_maximum3_f32 v4, v4, v20, v20
+; GFX950-NEXT: v_maximum3_f32 v5, v5, v21, v21
+; GFX950-NEXT: v_maximum3_f32 v6, v6, v22, v22
+; GFX950-NEXT: v_maximum3_f32 v7, v7, v23, v23
+; GFX950-NEXT: v_maximum3_f32 v8, v8, v24, v24
+; GFX950-NEXT: v_maximum3_f32 v9, v9, v25, v25
+; GFX950-NEXT: v_maximum3_f32 v10, v10, v26, v26
+; GFX950-NEXT: v_maximum3_f32 v11, v11, v27, v27
+; GFX950-NEXT: v_maximum3_f32 v12, v12, v28, v28
+; GFX950-NEXT: v_maximum3_f32 v13, v13, v29, v29
+; GFX950-NEXT: v_maximum3_f32 v14, v14, v30, v30
; GFX950-NEXT: s_waitcnt vmcnt(0)
-; GFX950-NEXT: v_max_f32_e32 v16, v15, v31
-; GFX950-NEXT: v_cndmask_b32_e32 v7, v32, v48, vcc
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v8, v24
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v8, v32, v49, vcc
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v9, v25
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v9, v32, v50, vcc
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v10, v26
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v10, v32, v51, vcc
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v11, v27
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v11, v32, v52, vcc
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v12, v28
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v12, v32, v53, vcc
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v13, v29
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v13, v32, v54, vcc
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v14, v30
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v14, v32, v55, vcc
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v15, v31
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v15, v32, v16, vcc
+; GFX950-NEXT: v_maximum3_f32 v15, v15, v31, v31
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v16f32:
@@ -2176,3 +2084,4 @@ define <16 x float> @v_maximum_v16f32(<16 x float> %src0, <16 x float> %src1) {
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; GCN: {{.*}}
+; GFX9: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll
index 2614fb3bf9f737..956de6de3aad3b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll
@@ -39,11 +39,7 @@ define float @v_minimum_f32(float %src0, float %src1) {
; GFX950-LABEL: v_minimum_f32:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_min_f32_e32 v2, v0, v1
-; GFX950-NEXT: v_mov_b32_e32 v3, 0x7fc00000
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, v1
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_f32:
@@ -89,11 +85,17 @@ define float @v_minimum_f32__nnan(float %src0, float %src1) {
; GFX8-NEXT: v_min_f32_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_f32__nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_f32__nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_f32__nnan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_f32__nnan:
; GFX10: ; %bb.0:
@@ -151,11 +153,7 @@ define float @v_minimum_f32__nsz(float %src0, float %src1) {
; GFX950-LABEL: v_minimum_f32__nsz:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_min_f32_e32 v2, v0, v1
-; GFX950-NEXT: v_mov_b32_e32 v3, 0x7fc00000
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, v1
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_f32__nsz:
@@ -201,11 +199,17 @@ define float @v_minimum_f32__nnan_nsz(float %src0, float %src1) {
; GFX8-NEXT: v_min_f32_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_f32__nnan_nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_f32__nnan_nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_f32__nnan_nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_f32__nnan_nsz:
; GFX10: ; %bb.0:
@@ -267,11 +271,7 @@ define float @v_minimum_f32__nnan_src0(float %arg0, float %src1) {
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_add_f32_e32 v0, 1.0, v0
-; GFX950-NEXT: v_min_f32_e32 v2, v0, v1
-; GFX950-NEXT: v_mov_b32_e32 v3, 0x7fc00000
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, v1
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_f32__nnan_src0:
@@ -344,11 +344,7 @@ define float @v_minimum_f32__nnan_src1(float %src0, float %arg1) {
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_add_f32_e32 v1, 1.0, v1
-; GFX950-NEXT: v_min_f32_e32 v2, v0, v1
-; GFX950-NEXT: v_mov_b32_e32 v3, 0x7fc00000
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, v1
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_f32__nnan_src1:
@@ -429,12 +425,8 @@ define void @s_minimum_f32(float inreg %src0, float inreg %src1) {
; GFX950-LABEL: s_minimum_f32:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_mov_b32_e32 v0, s1
-; GFX950-NEXT: v_min_f32_e32 v1, s0, v0
-; GFX950-NEXT: v_mov_b32_e32 v2, 0x7fc00000
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, s0, v0
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX950-NEXT: v_mov_b32_e32 v0, s0
+; GFX950-NEXT: v_minimum3_f32 v0, v0, s1, s1
; GFX950-NEXT: ;;#ASMSTART
; GFX950-NEXT: ; use v0
; GFX950-NEXT: ;;#ASMEND
@@ -521,15 +513,8 @@ define <2 x float> @v_minimum_v2f32(<2 x float> %src0, <2 x float> %src1) {
; GFX950-LABEL: v_minimum_v2f32:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_min_f32_e32 v4, v0, v2
-; GFX950-NEXT: v_mov_b32_e32 v5, 0x7fc00000
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX950-NEXT: v_min_f32_e32 v2, v1, v3
-; GFX950-NEXT: s_nop 0
-; GFX950-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX950-NEXT: v_minimum3_f32 v0, v0, v2, v2
+; GFX950-NEXT: v_minimum3_f32 v1, v1, v3, v3
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v2f32:
@@ -583,12 +568,19 @@ define <2 x float> @v_minimum_v2f32__nnan(<2 x float> %src0, <2 x float> %src1)
; GFX8-NEXT: v_min_f32_e32 v1, v1, v3
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v2f32__nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e32 v0, v0, v2
-; GFX9-NEXT: v_min_f32_e32 v1, v1, v3
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v2f32__nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX900-NEXT: v_min_f32_e32 v1, v1, v3
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v2f32__nnan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_minimum3_f32 v0, v0, v2, v2
+; GFX950-NEXT: v_minimum3_f32 v1, v1, v3, v3
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v2f32__nnan:
; GFX10: ; %bb.0:
@@ -657,15 +649,8 @@ define <2 x float> @v_minimum_v2f32__nsz(<2 x float> %src0, <2 x float> %src1) {
; GFX950-LABEL: v_minimum_v2f32__nsz:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_min_f32_e32 v4, v0, v2
-; GFX950-NEXT: v_mov_b32_e32 v5, 0x7fc00000
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX950-NEXT: v_min_f32_e32 v2, v1, v3
-; GFX950-NEXT: s_nop 0
-; GFX950-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX950-NEXT: v_minimum3_f32 v0, v0, v2, v2
+; GFX950-NEXT: v_minimum3_f32 v1, v1, v3, v3
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v2f32__nsz:
@@ -719,12 +704,19 @@ define <2 x float> @v_minimum_v2f32__nnan_nsz(<2 x float> %src0, <2 x float> %sr
; GFX8-NEXT: v_min_f32_e32 v1, v1, v3
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v2f32__nnan_nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e32 v0, v0, v2
-; GFX9-NEXT: v_min_f32_e32 v1, v1, v3
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v2f32__nnan_nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX900-NEXT: v_min_f32_e32 v1, v1, v3
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v2f32__nnan_nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_minimum3_f32 v0, v0, v2, v2
+; GFX950-NEXT: v_minimum3_f32 v1, v1, v3, v3
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v2f32__nnan_nsz:
; GFX10: ; %bb.0:
@@ -808,16 +800,10 @@ define void @s_minimum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) {
; GFX950-LABEL: s_minimum_v2f32:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_mov_b32_e32 v0, s3
-; GFX950-NEXT: v_min_f32_e32 v1, s1, v0
-; GFX950-NEXT: v_mov_b32_e32 v2, 0x7fc00000
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, s1, v0
-; GFX950-NEXT: v_mov_b32_e32 v0, s2
-; GFX950-NEXT: v_min_f32_e32 v3, s0, v0
-; GFX950-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, s0, v0
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
+; GFX950-NEXT: v_mov_b32_e32 v0, s1
+; GFX950-NEXT: v_minimum3_f32 v1, v0, s3, s3
+; GFX950-NEXT: v_mov_b32_e32 v0, s0
+; GFX950-NEXT: v_minimum3_f32 v0, v0, s2, s2
; GFX950-NEXT: ;;#ASMSTART
; GFX950-NEXT: ; use v[0:1]
; GFX950-NEXT: ;;#ASMEND
@@ -920,19 +906,9 @@ define <3 x float> @v_minimum_v3f32(<3 x float> %src0, <3 x float> %src1) {
; GFX950-LABEL: v_minimum_v3f32:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_min_f32_e32 v6, v0, v3
-; GFX950-NEXT: v_mov_b32_e32 v7, 0x7fc00000
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX950-NEXT: v_min_f32_e32 v3, v1, v4
-; GFX950-NEXT: s_nop 0
-; GFX950-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX950-NEXT: v_min_f32_e32 v3, v2, v5
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
+; GFX950-NEXT: v_minimum3_f32 v0, v0, v3, v3
+; GFX950-NEXT: v_minimum3_f32 v1, v1, v4, v4
+; GFX950-NEXT: v_minimum3_f32 v2, v2, v5, v5
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v3f32:
@@ -995,13 +971,21 @@ define <3 x float> @v_minimum_v3f32__nnan(<3 x float> %src0, <3 x float> %src1)
; GFX8-NEXT: v_min_f32_e32 v2, v2, v5
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v3f32__nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e32 v0, v0, v3
-; GFX9-NEXT: v_min_f32_e32 v1, v1, v4
-; GFX9-NEXT: v_min_f32_e32 v2, v2, v5
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v3f32__nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f32_e32 v0, v0, v3
+; GFX900-NEXT: v_min_f32_e32 v1, v1, v4
+; GFX900-NEXT: v_min_f32_e32 v2, v2, v5
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v3f32__nnan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_minimum3_f32 v0, v0, v3, v3
+; GFX950-NEXT: v_minimum3_f32 v1, v1, v4, v4
+; GFX950-NEXT: v_minimum3_f32 v2, v2, v5, v5
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v3f32__nnan:
; GFX10: ; %bb.0:
@@ -1082,19 +1066,9 @@ define <3 x float> @v_minimum_v3f32__nsz(<3 x float> %src0, <3 x float> %src1) {
; GFX950-LABEL: v_minimum_v3f32__nsz:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_min_f32_e32 v6, v0, v3
-; GFX950-NEXT: v_mov_b32_e32 v7, 0x7fc00000
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX950-NEXT: v_min_f32_e32 v3, v1, v4
-; GFX950-NEXT: s_nop 0
-; GFX950-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX950-NEXT: v_min_f32_e32 v3, v2, v5
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
+; GFX950-NEXT: v_minimum3_f32 v0, v0, v3, v3
+; GFX950-NEXT: v_minimum3_f32 v1, v1, v4, v4
+; GFX950-NEXT: v_minimum3_f32 v2, v2, v5, v5
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v3f32__nsz:
@@ -1157,13 +1131,21 @@ define <3 x float> @v_minimum_v3f32__nnan_nsz(<3 x float> %src0, <3 x float> %sr
; GFX8-NEXT: v_min_f32_e32 v2, v2, v5
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v3f32__nnan_nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e32 v0, v0, v3
-; GFX9-NEXT: v_min_f32_e32 v1, v1, v4
-; GFX9-NEXT: v_min_f32_e32 v2, v2, v5
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v3f32__nnan_nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f32_e32 v0, v0, v3
+; GFX900-NEXT: v_min_f32_e32 v1, v1, v4
+; GFX900-NEXT: v_min_f32_e32 v2, v2, v5
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v3f32__nnan_nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_minimum3_f32 v0, v0, v3, v3
+; GFX950-NEXT: v_minimum3_f32 v1, v1, v4, v4
+; GFX950-NEXT: v_minimum3_f32 v2, v2, v5, v5
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v3f32__nnan_nsz:
; GFX10: ; %bb.0:
@@ -1253,23 +1235,10 @@ define <4 x float> @v_minimum_v4f32(<4 x float> %src0, <4 x float> %src1) {
; GFX950-LABEL: v_minimum_v4f32:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_min_f32_e32 v8, v0, v4
-; GFX950-NEXT: v_mov_b32_e32 v9, 0x7fc00000
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
-; GFX950-NEXT: v_min_f32_e32 v4, v1, v5
-; GFX950-NEXT: s_nop 0
-; GFX950-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
-; GFX950-NEXT: v_min_f32_e32 v4, v2, v6
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
-; GFX950-NEXT: v_min_f32_e32 v4, v3, v7
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
+; GFX950-NEXT: v_minimum3_f32 v0, v0, v4, v4
+; GFX950-NEXT: v_minimum3_f32 v1, v1, v5, v5
+; GFX950-NEXT: v_minimum3_f32 v2, v2, v6, v6
+; GFX950-NEXT: v_minimum3_f32 v3, v3, v7, v7
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v4f32:
@@ -1341,14 +1310,23 @@ define <4 x float> @v_minimum_v4f32__nnan(<4 x float> %src0, <4 x float> %src1)
; GFX8-NEXT: v_min_f32_e32 v3, v3, v7
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v4f32__nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e32 v0, v0, v4
-; GFX9-NEXT: v_min_f32_e32 v1, v1, v5
-; GFX9-NEXT: v_min_f32_e32 v2, v2, v6
-; GFX9-NEXT: v_min_f32_e32 v3, v3, v7
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v4f32__nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f32_e32 v0, v0, v4
+; GFX900-NEXT: v_min_f32_e32 v1, v1, v5
+; GFX900-NEXT: v_min_f32_e32 v2, v2, v6
+; GFX900-NEXT: v_min_f32_e32 v3, v3, v7
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v4f32__nnan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_minimum3_f32 v0, v0, v4, v4
+; GFX950-NEXT: v_minimum3_f32 v1, v1, v5, v5
+; GFX950-NEXT: v_minimum3_f32 v2, v2, v6, v6
+; GFX950-NEXT: v_minimum3_f32 v3, v3, v7, v7
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v4f32__nnan:
; GFX10: ; %bb.0:
@@ -1440,23 +1418,10 @@ define <4 x float> @v_minimum_v4f32__nsz(<4 x float> %src0, <4 x float> %src1) {
; GFX950-LABEL: v_minimum_v4f32__nsz:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_min_f32_e32 v8, v0, v4
-; GFX950-NEXT: v_mov_b32_e32 v9, 0x7fc00000
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
-; GFX950-NEXT: v_min_f32_e32 v4, v1, v5
-; GFX950-NEXT: s_nop 0
-; GFX950-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
-; GFX950-NEXT: v_min_f32_e32 v4, v2, v6
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
-; GFX950-NEXT: v_min_f32_e32 v4, v3, v7
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
+; GFX950-NEXT: v_minimum3_f32 v0, v0, v4, v4
+; GFX950-NEXT: v_minimum3_f32 v1, v1, v5, v5
+; GFX950-NEXT: v_minimum3_f32 v2, v2, v6, v6
+; GFX950-NEXT: v_minimum3_f32 v3, v3, v7, v7
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v4f32__nsz:
@@ -1528,14 +1493,23 @@ define <4 x float> @v_minimum_v4f32__nnan_nsz(<4 x float> %src0, <4 x float> %sr
; GFX8-NEXT: v_min_f32_e32 v3, v3, v7
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v4f32__nnan_nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e32 v0, v0, v4
-; GFX9-NEXT: v_min_f32_e32 v1, v1, v5
-; GFX9-NEXT: v_min_f32_e32 v2, v2, v6
-; GFX9-NEXT: v_min_f32_e32 v3, v3, v7
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v4f32__nnan_nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f32_e32 v0, v0, v4
+; GFX900-NEXT: v_min_f32_e32 v1, v1, v5
+; GFX900-NEXT: v_min_f32_e32 v2, v2, v6
+; GFX900-NEXT: v_min_f32_e32 v3, v3, v7
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v4f32__nnan_nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_minimum3_f32 v0, v0, v4, v4
+; GFX950-NEXT: v_minimum3_f32 v1, v1, v5, v5
+; GFX950-NEXT: v_minimum3_f32 v2, v2, v6, v6
+; GFX950-NEXT: v_minimum3_f32 v3, v3, v7, v7
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v4f32__nnan_nsz:
; GFX10: ; %bb.0:
@@ -1663,39 +1637,14 @@ define <8 x float> @v_minimum_v8f32(<8 x float> %src0, <8 x float> %src1) {
; GFX950-LABEL: v_minimum_v8f32:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_min_f32_e32 v16, v0, v8
-; GFX950-NEXT: v_mov_b32_e32 v17, 0x7fc00000
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v8
-; GFX950-NEXT: v_min_f32_e32 v8, v1, v9
-; GFX950-NEXT: s_nop 0
-; GFX950-NEXT: v_cndmask_b32_e32 v0, v17, v16, vcc
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v9
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v1, v17, v8, vcc
-; GFX950-NEXT: v_min_f32_e32 v8, v2, v10
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v2, v10
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v2, v17, v8, vcc
-; GFX950-NEXT: v_min_f32_e32 v8, v3, v11
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v3, v11
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v3, v17, v8, vcc
-; GFX950-NEXT: v_min_f32_e32 v8, v4, v12
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v4, v12
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v4, v17, v8, vcc
-; GFX950-NEXT: v_min_f32_e32 v8, v5, v13
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v5, v13
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v5, v17, v8, vcc
-; GFX950-NEXT: v_min_f32_e32 v8, v6, v14
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v6, v14
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v6, v17, v8, vcc
-; GFX950-NEXT: v_min_f32_e32 v8, v7, v15
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v7, v15
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v7, v17, v8, vcc
+; GFX950-NEXT: v_minimum3_f32 v0, v0, v8, v8
+; GFX950-NEXT: v_minimum3_f32 v1, v1, v9, v9
+; GFX950-NEXT: v_minimum3_f32 v2, v2, v10, v10
+; GFX950-NEXT: v_minimum3_f32 v3, v3, v11, v11
+; GFX950-NEXT: v_minimum3_f32 v4, v4, v12, v12
+; GFX950-NEXT: v_minimum3_f32 v5, v5, v13, v13
+; GFX950-NEXT: v_minimum3_f32 v6, v6, v14, v14
+; GFX950-NEXT: v_minimum3_f32 v7, v7, v15, v15
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v8f32:
@@ -1980,64 +1929,23 @@ define <16 x float> @v_minimum_v16f32(<16 x float> %src0, <16 x float> %src1) {
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: scratch_load_dword v31, off, s32
-; GFX950-NEXT: v_mov_b32_e32 v32, 0x7fc00000
-; GFX950-NEXT: v_min_f32_e32 v33, v0, v16
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v16
-; GFX950-NEXT: v_min_f32_e32 v34, v1, v17
-; GFX950-NEXT: v_min_f32_e32 v35, v2, v18
-; GFX950-NEXT: v_cndmask_b32_e32 v0, v32, v33, vcc
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v17
-; GFX950-NEXT: v_min_f32_e32 v36, v3, v19
-; GFX950-NEXT: v_min_f32_e32 v37, v4, v20
-; GFX950-NEXT: v_cndmask_b32_e32 v1, v32, v34, vcc
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v2, v18
-; GFX950-NEXT: v_min_f32_e32 v38, v5, v21
-; GFX950-NEXT: v_min_f32_e32 v39, v6, v22
-; GFX950-NEXT: v_cndmask_b32_e32 v2, v32, v35, vcc
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v3, v19
-; GFX950-NEXT: v_min_f32_e32 v48, v7, v23
-; GFX950-NEXT: v_min_f32_e32 v49, v8, v24
-; GFX950-NEXT: v_cndmask_b32_e32 v3, v32, v36, vcc
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v4, v20
-; GFX950-NEXT: v_min_f32_e32 v50, v9, v25
-; GFX950-NEXT: v_min_f32_e32 v51, v10, v26
-; GFX950-NEXT: v_cndmask_b32_e32 v4, v32, v37, vcc
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v5, v21
-; GFX950-NEXT: v_min_f32_e32 v52, v11, v27
-; GFX950-NEXT: v_min_f32_e32 v53, v12, v28
-; GFX950-NEXT: v_cndmask_b32_e32 v5, v32, v38, vcc
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v6, v22
-; GFX950-NEXT: v_min_f32_e32 v54, v13, v29
-; GFX950-NEXT: v_min_f32_e32 v55, v14, v30
-; GFX950-NEXT: v_cndmask_b32_e32 v6, v32, v39, vcc
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v7, v23
+; GFX950-NEXT: v_minimum3_f32 v0, v0, v16, v16
+; GFX950-NEXT: v_minimum3_f32 v1, v1, v17, v17
+; GFX950-NEXT: v_minimum3_f32 v2, v2, v18, v18
+; GFX950-NEXT: v_minimum3_f32 v3, v3, v19, v19
+; GFX950-NEXT: v_minimum3_f32 v4, v4, v20, v20
+; GFX950-NEXT: v_minimum3_f32 v5, v5, v21, v21
+; GFX950-NEXT: v_minimum3_f32 v6, v6, v22, v22
+; GFX950-NEXT: v_minimum3_f32 v7, v7, v23, v23
+; GFX950-NEXT: v_minimum3_f32 v8, v8, v24, v24
+; GFX950-NEXT: v_minimum3_f32 v9, v9, v25, v25
+; GFX950-NEXT: v_minimum3_f32 v10, v10, v26, v26
+; GFX950-NEXT: v_minimum3_f32 v11, v11, v27, v27
+; GFX950-NEXT: v_minimum3_f32 v12, v12, v28, v28
+; GFX950-NEXT: v_minimum3_f32 v13, v13, v29, v29
+; GFX950-NEXT: v_minimum3_f32 v14, v14, v30, v30
; GFX950-NEXT: s_waitcnt vmcnt(0)
-; GFX950-NEXT: v_min_f32_e32 v16, v15, v31
-; GFX950-NEXT: v_cndmask_b32_e32 v7, v32, v48, vcc
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v8, v24
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v8, v32, v49, vcc
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v9, v25
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v9, v32, v50, vcc
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v10, v26
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v10, v32, v51, vcc
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v11, v27
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v11, v32, v52, vcc
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v12, v28
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v12, v32, v53, vcc
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v13, v29
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v13, v32, v54, vcc
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v14, v30
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v14, v32, v55, vcc
-; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v15, v31
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v15, v32, v16, vcc
+; GFX950-NEXT: v_minimum3_f32 v15, v15, v31, v31
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v16f32:
@@ -2176,3 +2084,4 @@ define <16 x float> @v_minimum_v16f32(<16 x float> %src0, <16 x float> %src1) {
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; GCN: {{.*}}
+; GFX9: {{.*}}
More information about the llvm-commits
mailing list