[clang] [llvm] AMDGPU: Copy correct predicates for SDWA reals (PR #116307)
Matt Arsenault via cfe-commits
cfe-commits at lists.llvm.org
Thu Nov 14 17:34:54 PST 2024
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/116307
AMDGPU: Copy correct predicates for SDWA reals
There are a lot of messes in the special case
predicate handling. Currently broad let blocks
override specific predicates with more general
cases. For instructions with SDWA, the HasSDWA
predicate was overriding the SubtargetPredicate
for the instruction.
This fixes enough to properly disallow new instructions
that support SDWA on older targets.
AMDGPU: Add gfx950 subtarget definitions
Mostly a stub, but adds some baseline tests and
tests for removed instructions.
>From 694f769398ed5511be85ac23e9dfc7f0a8e48054 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Wed, 13 Nov 2024 18:28:11 -0800
Subject: [PATCH 1/2] AMDGPU: Copy correct predicates for SDWA reals
There are a lot of messes in the special case
predicate handling. Currently broad let blocks
override specific predicates with more general
cases. For instructions with SDWA, the HasSDWA
predicate was overriding the SubtargetPredicate
for the instruction.
This fixes enough to properly disallow new instructions
that support SDWA on older targets.
---
llvm/lib/Target/AMDGPU/AMDGPU.td | 6 +++--
llvm/lib/Target/AMDGPU/VOP1Instructions.td | 4 ++--
llvm/lib/Target/AMDGPU/VOP2Instructions.td | 26 ++++++++++++----------
llvm/lib/Target/AMDGPU/VOPCInstructions.td | 2 +-
llvm/lib/Target/AMDGPU/VOPInstructions.td | 12 +++++-----
5 files changed, 27 insertions(+), 23 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index c8ae010414dc40..d7feaef8c4a97d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -2103,8 +2103,10 @@ def NotHasMinMaxDenormModes : Predicate<"!Subtarget->supportsMinMaxDenormModes()
def HasFminFmaxLegacy : Predicate<"Subtarget->hasFminFmaxLegacy()">;
-def HasSDWA : Predicate<"Subtarget->hasSDWA()">,
- AssemblerPredicate<(all_of FeatureSDWA, FeatureVolcanicIslands)>;
+def HasSDWA : Predicate<"Subtarget->hasSDWA()">;
+
+def HasSDWA8 : Predicate<"Subtarget->hasSDWA()">,
+ AssemblerPredicate<(all_of (not FeatureGFX9Insts), FeatureSDWA)>;
def HasSDWA9 :
Predicate<"Subtarget->hasSDWA()">,
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index c743eb43e3465c..f7a66a08209397 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -1268,7 +1268,7 @@ multiclass VOP1_Real_vi <bits<10> op> {
if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA then
def _sdwa_vi :
- VOP_SDWA_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>,
+ VOP_SDWA8_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>,
VOP1_SDWAe <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9 then
@@ -1474,7 +1474,7 @@ def : GCNPat <
// GFX9
//===----------------------------------------------------------------------===//
-let AssemblerPredicate = isGFX9Only, DecoderNamespace = "GFX9" in {
+let DecoderNamespace = "GFX9" in {
multiclass VOP1_Real_gfx9 <bits<10> op> {
defm NAME : VOP1_Real_e32e64_vi <op>;
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index 925b60561c9d68..c0d38fa52b3446 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -766,16 +766,16 @@ defm V_SUBB_U32 : VOP2bInst <"v_subb_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "
defm V_SUBBREV_U32 : VOP2bInst <"v_subbrev_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32">;
-let SubtargetPredicate = HasAddNoCarryInsts, isReMaterializable = 1 in {
+let SubtargetPredicate = HasAddNoCarryInsts, isReMaterializable = 1 in {
defm V_SUB_U32 : VOP2Inst <"v_sub_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_sub_u32">;
defm V_SUBREV_U32 : VOP2Inst <"v_subrev_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_sub_u32">;
}
-let SubtargetPredicate = HasAddNoCarryInsts, isReMaterializable = 1, isAdd = 1 in {
+let SubtargetPredicate = HasAddNoCarryInsts, isReMaterializable = 1, isAdd = 1 in {
defm V_ADD_U32 : VOP2Inst_VOPD <"v_add_u32", VOP_I32_I32_I32_ARITH, 0x10, "v_add_nc_u32", null_frag, "v_add_u32">;
}
-let isAdd = 1 in {
+let isAdd = 1 in {
defm V_ADD_CO_U32 : VOP2bInst <"v_add_co_u32", VOP2b_I32_I1_I32_I32, null_frag, "v_add_co_u32">;
defm V_ADDC_U32 : VOP2bInst <"v_addc_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_addc_u32">;
}
@@ -2290,10 +2290,10 @@ multiclass Base_VOP2_Real_e32e64_vi <bits<6> op> :
} // End AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8"
-multiclass VOP2_SDWA_Real <bits<6> op> {
+multiclass VOP2_SDWA8_Real <bits<6> op> {
if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA then
def _sdwa_vi :
- VOP_SDWA_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>,
+ VOP_SDWA8_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>,
VOP2_SDWAe <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
}
@@ -2321,7 +2321,7 @@ multiclass VOP2be_Real_e32e64_vi_only <bits<6> op, string OpName, string AsmName
}
if !cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtSDWA then
def _sdwa_vi :
- VOP_SDWA_Real <!cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa")>,
+ VOP_SDWA8_Real <!cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa")>,
VOP2_SDWAe <op{5-0}, !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa").Pfl> {
VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa");
let AsmString = AsmName # ps.AsmOperands;
@@ -2337,7 +2337,7 @@ multiclass VOP2be_Real_e32e64_vi_only <bits<6> op, string OpName, string AsmName
} // End AssemblerPredicate = isGFX8Only, DecoderNamespace = "GFX8"
-let AssemblerPredicate = isGFX9Only, DecoderNamespace = "GFX9" in {
+let DecoderNamespace = "GFX9" in {
multiclass VOP2be_Real_e32e64_gfx9 <bits<6> op, string OpName, string AsmName> {
def _e32_gfx9 :
@@ -2386,10 +2386,10 @@ multiclass VOP2_Real_e32e64_gfx9 <bits<6> op> {
VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")>;
}
-} // End AssemblerPredicate = isGFX9Only, DecoderNamespace = "GFX9"
+} // End DecoderNamespace = "GFX9"
multiclass VOP2_Real_e32e64_vi <bits<6> op> :
- Base_VOP2_Real_e32e64_vi<op>, VOP2_SDWA_Real<op>, VOP2_SDWA9_Real<op> {
+ Base_VOP2_Real_e32e64_vi<op>, VOP2_SDWA8_Real<op>, VOP2_SDWA9_Real<op> {
if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then
def _dpp_vi :
@@ -2401,7 +2401,7 @@ defm V_CNDMASK_B32 : VOP2_Real_e32e64_vi <0x0>;
defm V_ADD_F32 : VOP2_Real_e32e64_vi <0x1>;
defm V_SUB_F32 : VOP2_Real_e32e64_vi <0x2>;
defm V_SUBREV_F32 : VOP2_Real_e32e64_vi <0x3>;
-let AssemblerPredicate = isGCN3ExcludingGFX90A in
+let OtherPredicates = [isGCN3ExcludingGFX90A] in
defm V_MUL_LEGACY_F32 : VOP2_Real_e32e64_vi <0x4>;
defm V_MUL_F32 : VOP2_Real_e32e64_vi <0x5>;
defm V_MUL_I32_I24 : VOP2_Real_e32e64_vi <0x6>;
@@ -2431,6 +2431,7 @@ defm V_ADDC_U32 : VOP2be_Real_e32e64_vi_only <0x1c, "V_ADDC_U32", "
defm V_SUBB_U32 : VOP2be_Real_e32e64_vi_only <0x1d, "V_SUBB_U32", "v_subb_u32">;
defm V_SUBBREV_U32 : VOP2be_Real_e32e64_vi_only <0x1e, "V_SUBBREV_U32", "v_subbrev_u32">;
+let AssemblerPredicate = isGFX9Only in {
defm V_ADD_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x19, "V_ADD_CO_U32", "v_add_co_u32">;
defm V_SUB_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1a, "V_SUB_CO_U32", "v_sub_co_u32">;
defm V_SUBREV_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1b, "V_SUBREV_CO_U32", "v_subrev_co_u32">;
@@ -2441,6 +2442,7 @@ defm V_SUBBREV_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1e, "V_SUBBREV_U32", "v_s
defm V_ADD_U32 : VOP2_Real_e32e64_gfx9 <0x34>;
defm V_SUB_U32 : VOP2_Real_e32e64_gfx9 <0x35>;
defm V_SUBREV_U32 : VOP2_Real_e32e64_gfx9 <0x36>;
+} // End AssemblerPredicate = isGFX9Only
defm V_BFM_B32 : VOP2_Real_e64only_vi <0x293>;
defm V_BCNT_U32_B32 : VOP2_Real_e64only_vi <0x28b>;
@@ -2518,7 +2520,7 @@ defm V_XNOR_B32 : VOP2_Real_e32e64_vi <0x3d>;
} // End SubtargetPredicate = HasDLInsts
-let AssemblerPredicate = isGFX90APlus, DecoderNamespace = "GFX90A" in {
+let DecoderNamespace = "GFX90A" in {
multiclass VOP2_Real_e32_gfx90a <bits<6> op> {
def _e32_gfx90a :
VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX90A>,
@@ -2551,7 +2553,7 @@ let SubtargetPredicate = HasFmacF64Inst in {
defm V_FMAC_F64 : VOP2_Real_e32e64_gfx90a <0x4>;
} // End SubtargetPredicate = HasFmacF64Inst
-let SubtargetPredicate = isGFX90APlus, IsSingle = 1 in {
+let IsSingle = 1 in {
defm V_MUL_LEGACY_F32 : VOP2_Real_e64_gfx90a <0x2a1>;
}
diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
index d6e08dce130ced..f4ccae1decb1df 100644
--- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
@@ -2290,7 +2290,7 @@ multiclass VOPC_Real_vi <bits<10> op> {
if !cast<VOPC_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA then
def _sdwa_vi :
- VOP_SDWA_Real <!cast<VOPC_SDWA_Pseudo>(NAME#"_sdwa")>,
+ VOP_SDWA8_Real <!cast<VOPC_SDWA_Pseudo>(NAME#"_sdwa")>,
VOPC_SDWAe <op{7-0}, !cast<VOPC_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
if !cast<VOPC_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9 then
diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index aab5dc7465d938..24fe24b1a53141 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -650,7 +650,7 @@ class VOP_SDWA_Pseudo <string opName, VOPProfile P, list<dag> pattern=[]> :
let Uses = !if(ReadsModeReg, [MODE, EXEC], [EXEC]);
let SubtargetPredicate = HasSDWA;
- let AssemblerPredicate = HasSDWA;
+ //let AssemblerPredicate = HasSDWA;
let AsmVariantName = !if(P.HasExtSDWA, AMDGPUAsmVariants.SDWA,
AMDGPUAsmVariants.Disable);
let DecoderNamespace = "GFX8";
@@ -658,7 +658,7 @@ class VOP_SDWA_Pseudo <string opName, VOPProfile P, list<dag> pattern=[]> :
VOPProfile Pfl = P;
}
-class VOP_SDWA_Real <VOP_SDWA_Pseudo ps> :
+class VOP_SDWA8_Real <VOP_SDWA_Pseudo ps> :
InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>,
SIMCInstr <ps.PseudoInstr, SIEncodingFamily.SDWA> {
@@ -676,7 +676,7 @@ class VOP_SDWA_Real <VOP_SDWA_Pseudo ps> :
// Copy relevant pseudo op flags
let SubtargetPredicate = ps.SubtargetPredicate;
- let AssemblerPredicate = ps.AssemblerPredicate;
+ let AssemblerPredicate = HasSDWA8;
let AsmMatchConverter = ps.AsmMatchConverter;
let AsmVariantName = ps.AsmVariantName;
let UseNamedOperandTable = ps.UseNamedOperandTable;
@@ -708,7 +708,7 @@ class Base_VOP_SDWA9_Real <VOP_SDWA_Pseudo ps> :
let Constraints = ps.Constraints;
let DisableEncoding = ps.DisableEncoding;
- let SubtargetPredicate = HasSDWA9;
+ let SubtargetPredicate = ps.SubtargetPredicate;
let AssemblerPredicate = HasSDWA9;
let OtherPredicates = ps.OtherPredicates;
let AsmVariantName = !if(ps.Pfl.HasExtSDWA9, AMDGPUAsmVariants.SDWA9,
@@ -735,7 +735,7 @@ class VOP_SDWA9_Real <VOP_SDWA_Pseudo ps> :
SIMCInstr <ps.PseudoInstr, SIEncodingFamily.SDWA9>;
class Base_VOP_SDWA10_Real<VOP_SDWA_Pseudo ps> : Base_VOP_SDWA9_Real<ps> {
- let SubtargetPredicate = HasSDWA10;
+ let SubtargetPredicate = ps.SubtargetPredicate;
let AssemblerPredicate = HasSDWA10;
let DecoderNamespace = "GFX10";
}
@@ -1508,7 +1508,7 @@ class VOP3_DPP16_t16_Helper<bits<10> op, VOP_DPP_Pseudo ps,
let SchedRW = ps.SchedRW;
let Uses = ps.Uses;
let AssemblerPredicate = HasDPP16;
- let SubtargetPredicate = HasDPP16;
+ let SubtargetPredicate = ps.SubtargetPredicate;
let OtherPredicates = ps.OtherPredicates;
}
>From d6fb34c24c2d71a149bb4e7c4c9ada0a343d9313 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Fri, 17 Nov 2023 17:49:52 +0900
Subject: [PATCH 2/2] AMDGPU: Add gfx950 subtarget definitions
Mostly a stub, but adds some baseline tests and
tests for removed instructions.
---
clang/docs/ReleaseNotes.rst | 2 +
clang/include/clang/Basic/Cuda.h | 1 +
clang/lib/Basic/Cuda.cpp | 1 +
clang/lib/Basic/Targets/NVPTX.cpp | 1 +
clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp | 1 +
clang/test/CodeGenOpenCL/amdgpu-features.cl | 2 +
clang/test/Driver/amdgpu-macros.cl | 1 +
clang/test/Driver/amdgpu-mcpu.cl | 2 +
.../Misc/target-invalid-cpu-note/amdgcn.c | 1 +
.../test/Misc/target-invalid-cpu-note/nvptx.c | 1 +
llvm/docs/AMDGPUUsage.rst | 9 +-
llvm/include/llvm/BinaryFormat/ELF.h | 2 +-
llvm/include/llvm/TargetParser/TargetParser.h | 25 +-
llvm/lib/Object/ELFObjectFile.cpp | 2 +
llvm/lib/ObjectYAML/ELFYAML.cpp | 1 +
llvm/lib/Target/AMDGPU/AMDGPU.td | 16 +
llvm/lib/Target/AMDGPU/GCNProcessors.td | 4 +
llvm/lib/Target/AMDGPU/GCNSubtarget.h | 1 +
.../MCTargetDesc/AMDGPUTargetStreamer.cpp | 2 +
llvm/lib/TargetParser/TargetParser.cpp | 11 +-
llvm/test/CodeGen/AMDGPU/bf16-conversions.ll | 345 ++--
.../CodeGen/AMDGPU/directive-amdgcn-target.ll | 6 +
.../CodeGen/AMDGPU/elf-header-flags-mach.ll | 2 +
.../AMDGPU/elf-header-flags-sramecc.ll | 8 +
llvm/test/CodeGen/AMDGPU/fmaximum3.ll | 594 +++++--
llvm/test/CodeGen/AMDGPU/fminimum3.ll | 594 +++++--
.../CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll | 2 +
llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll | 1224 ++++++-------
llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll | 1113 ++++++------
llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll | 1569 ++++++++---------
llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll | 1223 ++++++-------
llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll | 1113 ++++++------
llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll | 1569 ++++++++---------
llvm/test/MC/AMDGPU/flat-scratch-gfx940.s | 1 +
llvm/test/MC/AMDGPU/gfx940_asm_features.s | 1 +
llvm/test/MC/AMDGPU/gfx950-unsupported.s | 179 ++
.../MC/AMDGPU/gfx950_invalid_encoding.txt | 13 +
.../Disassembler/AMDGPU/gfx940_features.txt | 1 +
.../Object/AMDGPU/elf-header-flags-mach.yaml | 7 +
.../llvm-objdump/ELF/AMDGPU/subtarget.ll | 5 +
.../llvm-readobj/ELF/AMDGPU/elf-headers.test | 9 +
llvm/tools/llvm-readobj/ELFDumper.cpp | 1 +
offload/DeviceRTL/CMakeLists.txt | 2 +-
43 files changed, 5148 insertions(+), 4519 deletions(-)
create mode 100644 llvm/test/MC/AMDGPU/gfx950-unsupported.s
create mode 100644 llvm/test/MC/AMDGPU/gfx950_invalid_encoding.txt
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 681728d36952c1..5caedd0b6d3cfd 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -710,6 +710,8 @@ Target Specific Changes
AMDGPU Support
^^^^^^^^^^^^^^
+* Initial support for gfx950
+
X86 Support
^^^^^^^^^^^
diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h
index 721e8981af6ffc..c2a4addf488df1 100644
--- a/clang/include/clang/Basic/Cuda.h
+++ b/clang/include/clang/Basic/Cuda.h
@@ -107,6 +107,7 @@ enum class OffloadArch {
GFX940,
GFX941,
GFX942,
+ GFX950,
GFX10_1_GENERIC,
GFX1010,
GFX1011,
diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp
index 59c932468cd891..d56609a2a8f24a 100644
--- a/clang/lib/Basic/Cuda.cpp
+++ b/clang/lib/Basic/Cuda.cpp
@@ -125,6 +125,7 @@ static const OffloadArchToStringMap arch_names[] = {
GFX(940), // gfx940
GFX(941), // gfx941
GFX(942), // gfx942
+ GFX(950), // gfx950
{OffloadArch::GFX10_1_GENERIC, "gfx10-1-generic", "compute_amdgcn"},
GFX(1010), // gfx1010
GFX(1011), // gfx1011
diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp
index 0897032c4b8546..dbc3fec3657610 100644
--- a/clang/lib/Basic/Targets/NVPTX.cpp
+++ b/clang/lib/Basic/Targets/NVPTX.cpp
@@ -209,6 +209,7 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
case OffloadArch::GFX940:
case OffloadArch::GFX941:
case OffloadArch::GFX942:
+ case OffloadArch::GFX950:
case OffloadArch::GFX10_1_GENERIC:
case OffloadArch::GFX1010:
case OffloadArch::GFX1011:
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
index 43dc0e62284602..b595d3250d6230 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -2305,6 +2305,7 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(const OMPRequiresDecl *D) {
case OffloadArch::GFX940:
case OffloadArch::GFX941:
case OffloadArch::GFX942:
+ case OffloadArch::GFX950:
case OffloadArch::GFX10_1_GENERIC:
case OffloadArch::GFX1010:
case OffloadArch::GFX1011:
diff --git a/clang/test/CodeGenOpenCL/amdgpu-features.cl b/clang/test/CodeGenOpenCL/amdgpu-features.cl
index 8b56ec94f2c4ee..5c324032b51956 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-features.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-features.cl
@@ -32,6 +32,7 @@
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx940 -emit-llvm -o - %s | FileCheck --check-prefix=GFX940 %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx941 -emit-llvm -o - %s | FileCheck --check-prefix=GFX941 %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx942 -emit-llvm -o - %s | FileCheck --check-prefix=GFX942 %s
+// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx950 -emit-llvm -o - %s | FileCheck --check-prefix=GFX950 %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1010 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1010 %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1011 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1011 %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1012 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1012 %s
@@ -88,6 +89,7 @@
// GFX941: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts"
// GFX942: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts"
// GFX9_4_Generic: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
+// GFX950: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
// GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
// GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
// GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
diff --git a/clang/test/Driver/amdgpu-macros.cl b/clang/test/Driver/amdgpu-macros.cl
index d354f933c5ad78..d97b2ddb1fc663 100644
--- a/clang/test/Driver/amdgpu-macros.cl
+++ b/clang/test/Driver/amdgpu-macros.cl
@@ -110,6 +110,7 @@
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx940 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx940 -DFAMILY=GFX9
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx941 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx941 -DFAMILY=GFX9
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx942 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx942 -DFAMILY=GFX9
+// RUN: %clang -E -dM -target amdgcn -mcpu=gfx950 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx950 -DFAMILY=GFX9
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1010 -DFAMILY=GFX10
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1011 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1011 -DFAMILY=GFX10
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1012 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1012 -DFAMILY=GFX10
diff --git a/clang/test/Driver/amdgpu-mcpu.cl b/clang/test/Driver/amdgpu-mcpu.cl
index ba578435072985..7c34d3ec6c63a9 100644
--- a/clang/test/Driver/amdgpu-mcpu.cl
+++ b/clang/test/Driver/amdgpu-mcpu.cl
@@ -95,6 +95,7 @@
// RUN: %clang -### -target amdgcn -mcpu=gfx940 %s 2>&1 | FileCheck --check-prefix=GFX940 %s
// RUN: %clang -### -target amdgcn -mcpu=gfx941 %s 2>&1 | FileCheck --check-prefix=GFX941 %s
// RUN: %clang -### -target amdgcn -mcpu=gfx942 %s 2>&1 | FileCheck --check-prefix=GFX942 %s
+// RUN: %clang -### -target amdgcn -mcpu=gfx950 %s 2>&1 | FileCheck --check-prefix=GFX950 %s
// RUN: %clang -### -target amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefix=GFX1010 %s
// RUN: %clang -### -target amdgcn -mcpu=gfx1011 %s 2>&1 | FileCheck --check-prefix=GFX1011 %s
// RUN: %clang -### -target amdgcn -mcpu=gfx1012 %s 2>&1 | FileCheck --check-prefix=GFX1012 %s
@@ -150,6 +151,7 @@
// GFX940: "-target-cpu" "gfx940"
// GFX941: "-target-cpu" "gfx941"
// GFX942: "-target-cpu" "gfx942"
+// GFX950: "-target-cpu" "gfx950"
// GFX1010: "-target-cpu" "gfx1010"
// GFX1011: "-target-cpu" "gfx1011"
// GFX1012: "-target-cpu" "gfx1012"
diff --git a/clang/test/Misc/target-invalid-cpu-note/amdgcn.c b/clang/test/Misc/target-invalid-cpu-note/amdgcn.c
index 4e675871f1e5bd..642d2df211c21a 100644
--- a/clang/test/Misc/target-invalid-cpu-note/amdgcn.c
+++ b/clang/test/Misc/target-invalid-cpu-note/amdgcn.c
@@ -48,6 +48,7 @@
// CHECK-SAME: {{^}}, gfx940
// CHECK-SAME: {{^}}, gfx941
// CHECK-SAME: {{^}}, gfx942
+// CHECK-SAME: {{^}}, gfx950
// CHECK-SAME: {{^}}, gfx1010
// CHECK-SAME: {{^}}, gfx1011
// CHECK-SAME: {{^}}, gfx1012
diff --git a/clang/test/Misc/target-invalid-cpu-note/nvptx.c b/clang/test/Misc/target-invalid-cpu-note/nvptx.c
index 44fe07065b2428..3ea6c02d6b3846 100644
--- a/clang/test/Misc/target-invalid-cpu-note/nvptx.c
+++ b/clang/test/Misc/target-invalid-cpu-note/nvptx.c
@@ -54,6 +54,7 @@
// CHECK-SAME: {{^}}, gfx940
// CHECK-SAME: {{^}}, gfx941
// CHECK-SAME: {{^}}, gfx942
+// CHECK-SAME: {{^}}, gfx950
// CHECK-SAME: {{^}}, gfx10-1-generic
// CHECK-SAME: {{^}}, gfx1010
// CHECK-SAME: {{^}}, gfx1011
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index c180ca5fcebef3..b85b680b9c82d3 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -399,6 +399,13 @@ Every processor supports every OS ABI (see :ref:`amdgpu-os`) with the following
work-item
IDs
+ ``gfx950`` ``amdgcn`` dGPU - sramecc - Architected *TBA*
+ - tgsplit flat
+ - xnack scratch .. TODO::
+ - kernarg preload - Packed
+ work-item Add product
+ IDs names.
+
**GCN GFX10.1 (RDNA 1)** [AMD-GCN-GFX10-RDNA1]_
-----------------------------------------------------------------------------------------------------------------------
``gfx1010`` ``amdgcn`` dGPU - cumode - Absolute - *rocm-amdhsa* - Radeon RX 5700
@@ -2178,7 +2185,7 @@ The AMDGPU backend uses the following ELF header:
``EF_AMDGPU_MACH_AMDGCN_GFX942`` 0x04c ``gfx942``
*reserved* 0x04d Reserved.
``EF_AMDGPU_MACH_AMDGCN_GFX1201`` 0x04e ``gfx1201``
- *reserved* 0x04f Reserved.
+ ``EF_AMDGPU_MACH_AMDGCN_GFX950`` 0x04f ``gfx950``
*reserved* 0x050 Reserved.
``EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC`` 0x051 ``gfx9-generic``
``EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC`` 0x052 ``gfx10-1-generic``
diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h
index 6c05ea7208e1f1..fd32a6ec19652b 100644
--- a/llvm/include/llvm/BinaryFormat/ELF.h
+++ b/llvm/include/llvm/BinaryFormat/ELF.h
@@ -811,7 +811,7 @@ enum : unsigned {
EF_AMDGPU_MACH_AMDGCN_GFX942 = 0x04c,
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X4D = 0x04d,
EF_AMDGPU_MACH_AMDGCN_GFX1201 = 0x04e,
- EF_AMDGPU_MACH_AMDGCN_RESERVED_0X4F = 0x04f,
+ EF_AMDGPU_MACH_AMDGCN_GFX950 = 0x04f,
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X50 = 0x050,
EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC = 0x051,
EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC = 0x052,
diff --git a/llvm/include/llvm/TargetParser/TargetParser.h b/llvm/include/llvm/TargetParser/TargetParser.h
index c6db4dfd7f5159..55e7b417428c4e 100644
--- a/llvm/include/llvm/TargetParser/TargetParser.h
+++ b/llvm/include/llvm/TargetParser/TargetParser.h
@@ -86,18 +86,19 @@ enum GPUKind : uint32_t {
GK_GFX940 = 68,
GK_GFX941 = 69,
GK_GFX942 = 70,
-
- GK_GFX1010 = 71,
- GK_GFX1011 = 72,
- GK_GFX1012 = 73,
- GK_GFX1013 = 74,
- GK_GFX1030 = 75,
- GK_GFX1031 = 76,
- GK_GFX1032 = 77,
- GK_GFX1033 = 78,
- GK_GFX1034 = 79,
- GK_GFX1035 = 80,
- GK_GFX1036 = 81,
+ GK_GFX950 = 71,
+
+ GK_GFX1010 = 72,
+ GK_GFX1011 = 73,
+ GK_GFX1012 = 74,
+ GK_GFX1013 = 75,
+ GK_GFX1030 = 76,
+ GK_GFX1031 = 77,
+ GK_GFX1032 = 78,
+ GK_GFX1033 = 79,
+ GK_GFX1034 = 80,
+ GK_GFX1035 = 81,
+ GK_GFX1036 = 82,
GK_GFX1100 = 90,
GK_GFX1101 = 91,
diff --git a/llvm/lib/Object/ELFObjectFile.cpp b/llvm/lib/Object/ELFObjectFile.cpp
index 9dc39936ffd8bb..2ffb2ac5e7e453 100644
--- a/llvm/lib/Object/ELFObjectFile.cpp
+++ b/llvm/lib/Object/ELFObjectFile.cpp
@@ -550,6 +550,8 @@ StringRef ELFObjectFileBase::getAMDGPUCPUName() const {
return "gfx941";
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX942:
return "gfx942";
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX950:
+ return "gfx950";
// AMDGCN GFX10.
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010:
diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp
index 130b8798ab4a46..ca0ea03452d3be 100644
--- a/llvm/lib/ObjectYAML/ELFYAML.cpp
+++ b/llvm/lib/ObjectYAML/ELFYAML.cpp
@@ -609,6 +609,7 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX940, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX941, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX942, EF_AMDGPU_MACH);
+ BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX950, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1010, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1011, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1012, EF_AMDGPU_MACH);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index d7feaef8c4a97d..d028c1f5ca7613 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -360,6 +360,12 @@ def FeatureGFX940Insts : SubtargetFeature<"gfx940-insts",
"Additional instructions for GFX940+"
>;
+def FeatureGFX950Insts : SubtargetFeature<"gfx950-insts",
+ "GFX950Insts",
+ "true",
+ "Additional instructions for GFX950+"
+>;
+
def FeatureGFX10Insts : SubtargetFeature<"gfx10-insts",
"GFX10Insts",
"true",
@@ -1470,6 +1476,14 @@ def FeatureISAVersion9_4_Common : FeatureSet<
FeatureFlatBufferGlobalAtomicFaddF64Inst
]>;
+def FeatureISAVersion9_5_Common : FeatureSet<
+ !listconcat(FeatureISAVersion9_4_Common.Features,
+ [FeatureFP8Insts,
+ FeatureFP8ConversionInsts,
+ FeatureCvtFP8VOP1Bug,
+ FeatureGFX950Insts
+ ])>;
+
def FeatureISAVersion9_4_0 : FeatureSet<
!listconcat(FeatureISAVersion9_4_Common.Features,
[
@@ -1503,6 +1517,8 @@ def FeatureISAVersion9_4_Generic : FeatureSet<
!listconcat(FeatureISAVersion9_4_Common.Features,
[FeatureRequiresCOV6])>;
+def FeatureISAVersion9_5_0 : FeatureSet<FeatureISAVersion9_5_Common.Features>;
+
def FeatureISAVersion10_Common : FeatureSet<
[FeatureGFX10,
FeatureLDSBankCount32,
diff --git a/llvm/lib/Target/AMDGPU/GCNProcessors.td b/llvm/lib/Target/AMDGPU/GCNProcessors.td
index 067043d290b760..3403cbab526d46 100644
--- a/llvm/lib/Target/AMDGPU/GCNProcessors.td
+++ b/llvm/lib/Target/AMDGPU/GCNProcessors.td
@@ -204,6 +204,10 @@ def : ProcessorModel<"gfx942", SIDPGFX940FullSpeedModel,
FeatureISAVersion9_4_2.Features
>;
+def : ProcessorModel<"gfx950", SIDPGFX940FullSpeedModel,
+ FeatureISAVersion9_5_0.Features
+>;
+
// [gfx900, gfx902, gfx904, gfx906, gfx909, gfx90c]
def : ProcessorModel<"gfx9-generic", SIQuarterSpeedModel,
FeatureISAVersion9_Generic.Features
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 6ff964077d8fd0..1b06756a8a1016 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -106,6 +106,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool GFX9Insts = false;
bool GFX90AInsts = false;
bool GFX940Insts = false;
+ bool GFX950Insts = false;
bool GFX10Insts = false;
bool GFX11Insts = false;
bool GFX12Insts = false;
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 55ba5ebbebb8fd..ffde4d33f1341a 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -96,6 +96,7 @@ StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX940: AK = GK_GFX940; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX941: AK = GK_GFX941; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX942: AK = GK_GFX942; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX950: AK = GK_GFX950; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: AK = GK_GFX1010; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011: AK = GK_GFX1011; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012: AK = GK_GFX1012; break;
@@ -182,6 +183,7 @@ unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) {
case GK_GFX940: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX940;
case GK_GFX941: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX941;
case GK_GFX942: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX942;
+ case GK_GFX950: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX950;
case GK_GFX1010: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010;
case GK_GFX1011: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011;
case GK_GFX1012: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012;
diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp
index 7dfb8c021a8a5f..b0385915f3042b 100644
--- a/llvm/lib/TargetParser/TargetParser.cpp
+++ b/llvm/lib/TargetParser/TargetParser.cpp
@@ -107,6 +107,7 @@ constexpr GPUInfo AMDGCNGPUs[] = {
{{"gfx940"}, {"gfx940"}, GK_GFX940, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
{{"gfx941"}, {"gfx941"}, GK_GFX941, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
{{"gfx942"}, {"gfx942"}, GK_GFX942, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
+ {{"gfx950"}, {"gfx950"}, GK_GFX950, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
{{"gfx1010"}, {"gfx1010"}, GK_GFX1010, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP},
{{"gfx1011"}, {"gfx1011"}, GK_GFX1011, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP},
{{"gfx1012"}, {"gfx1012"}, GK_GFX1012, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP},
@@ -262,6 +263,7 @@ AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) {
case GK_GFX940: return {9, 4, 0};
case GK_GFX941: return {9, 4, 1};
case GK_GFX942: return {9, 4, 2};
+ case GK_GFX950: return {9, 5, 0};
case GK_GFX1010: return {10, 1, 0};
case GK_GFX1011: return {10, 1, 1};
case GK_GFX1012: return {10, 1, 2};
@@ -361,7 +363,8 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
Features["wavefrontsize32"] = true;
Features["wavefrontsize64"] = true;
} else if (T.isAMDGCN()) {
- switch (parseArchAMDGCN(GPU)) {
+ AMDGPU::GPUKind Kind = parseArchAMDGCN(GPU);
+ switch (Kind) {
case GK_GFX1201:
case GK_GFX1200:
case GK_GFX12_GENERIC:
@@ -466,12 +469,16 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
Features["s-memtime-inst"] = true;
Features["gws"] = true;
break;
+ case GK_GFX950:
+ Features["gfx950-insts"] = true;
+ [[fallthrough]];
case GK_GFX942:
case GK_GFX941:
case GK_GFX940:
Features["fp8-insts"] = true;
Features["fp8-conversion-insts"] = true;
- Features["xf32-insts"] = true;
+ if (Kind != GK_GFX950)
+ Features["xf32-insts"] = true;
[[fallthrough]];
case GK_GFX9_4_GENERIC:
Features["gfx940-insts"] = true;
diff --git a/llvm/test/CodeGen/AMDGPU/bf16-conversions.ll b/llvm/test/CodeGen/AMDGPU/bf16-conversions.ll
index 1c9f35dd45feeb..425fc5884cec7f 100644
--- a/llvm/test/CodeGen/AMDGPU/bf16-conversions.ll
+++ b/llvm/test/CodeGen/AMDGPU/bf16-conversions.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc -march=amdgcn -mcpu=gfx940 < %s | FileCheck --check-prefixes=GCN %s
+; RUN: llc -march=amdgcn -mcpu=gfx940 < %s | FileCheck --check-prefixes=GCN,GFX-940 %s
+; RUN: llc -march=amdgcn -mcpu=gfx950 < %s | FileCheck --check-prefixes=GCN,GFX-950 %s
; TODO: Add global-isel when it can support bf16
@@ -198,19 +199,33 @@ entry:
}
define amdgpu_ps void @fptrunc_f32_to_bf16(float %a, ptr %out) {
-; GCN-LABEL: fptrunc_f32_to_bf16:
-; GCN: ; %bb.0: ; %entry
-; GCN-NEXT: v_mov_b32_e32 v3, v2
-; GCN-NEXT: v_mov_b32_e32 v2, v1
-; GCN-NEXT: v_bfe_u32 v1, v0, 16, 1
-; GCN-NEXT: s_movk_i32 s0, 0x7fff
-; GCN-NEXT: v_add3_u32 v1, v1, v0, s0
-; GCN-NEXT: v_or_b32_e32 v4, 0x400000, v0
-; GCN-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
-; GCN-NEXT: s_nop 1
-; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v4, vcc
-; GCN-NEXT: flat_store_short_d16_hi v[2:3], v0 sc0 sc1
-; GCN-NEXT: s_endpgm
+; GFX-940-LABEL: fptrunc_f32_to_bf16:
+; GFX-940: ; %bb.0: ; %entry
+; GFX-940-NEXT: v_mov_b32_e32 v3, v2
+; GFX-940-NEXT: v_mov_b32_e32 v2, v1
+; GFX-940-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX-940-NEXT: s_movk_i32 s0, 0x7fff
+; GFX-940-NEXT: v_add3_u32 v1, v1, v0, s0
+; GFX-940-NEXT: v_or_b32_e32 v4, 0x400000, v0
+; GFX-940-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
+; GFX-940-NEXT: s_nop 1
+; GFX-940-NEXT: v_cndmask_b32_e32 v0, v1, v4, vcc
+; GFX-940-NEXT: flat_store_short_d16_hi v[2:3], v0 sc0 sc1
+; GFX-940-NEXT: s_endpgm
+;
+; GFX-950-LABEL: fptrunc_f32_to_bf16:
+; GFX-950: ; %bb.0: ; %entry
+; GFX-950-NEXT: v_mov_b32_e32 v3, v2
+; GFX-950-NEXT: v_mov_b32_e32 v2, v1
+; GFX-950-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX-950-NEXT: s_movk_i32 s0, 0x7fff
+; GFX-950-NEXT: v_add3_u32 v1, v1, v0, s0
+; GFX-950-NEXT: v_or_b32_e32 v4, 0x400000, v0
+; GFX-950-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
+; GFX-950-NEXT: s_nop 1
+; GFX-950-NEXT: v_cndmask_b32_e32 v0, v1, v4, vcc
+; GFX-950-NEXT: flat_store_short_d16_hi v[2:3], v0
+; GFX-950-NEXT: s_endpgm
entry:
%a.cvt = fptrunc float %a to bfloat
store bfloat %a.cvt, ptr %out
@@ -218,20 +233,35 @@ entry:
}
define amdgpu_ps void @fptrunc_f32_to_bf16_abs(float %a, ptr %out) {
-; GCN-LABEL: fptrunc_f32_to_bf16_abs:
-; GCN: ; %bb.0: ; %entry
-; GCN-NEXT: v_mov_b32_e32 v3, v2
-; GCN-NEXT: v_mov_b32_e32 v2, v1
-; GCN-NEXT: v_and_b32_e32 v1, 0x7fffffff, v0
-; GCN-NEXT: v_bfe_u32 v4, v1, 16, 1
-; GCN-NEXT: s_movk_i32 s0, 0x7fff
-; GCN-NEXT: v_add3_u32 v4, v4, v1, s0
-; GCN-NEXT: v_or_b32_e32 v1, 0x400000, v1
-; GCN-NEXT: v_cmp_u_f32_e64 vcc, |v0|, |v0|
-; GCN-NEXT: s_nop 1
-; GCN-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
-; GCN-NEXT: flat_store_short_d16_hi v[2:3], v0 sc0 sc1
-; GCN-NEXT: s_endpgm
+; GFX-940-LABEL: fptrunc_f32_to_bf16_abs:
+; GFX-940: ; %bb.0: ; %entry
+; GFX-940-NEXT: v_mov_b32_e32 v3, v2
+; GFX-940-NEXT: v_mov_b32_e32 v2, v1
+; GFX-940-NEXT: v_and_b32_e32 v1, 0x7fffffff, v0
+; GFX-940-NEXT: v_bfe_u32 v4, v1, 16, 1
+; GFX-940-NEXT: s_movk_i32 s0, 0x7fff
+; GFX-940-NEXT: v_add3_u32 v4, v4, v1, s0
+; GFX-940-NEXT: v_or_b32_e32 v1, 0x400000, v1
+; GFX-940-NEXT: v_cmp_u_f32_e64 vcc, |v0|, |v0|
+; GFX-940-NEXT: s_nop 1
+; GFX-940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX-940-NEXT: flat_store_short_d16_hi v[2:3], v0 sc0 sc1
+; GFX-940-NEXT: s_endpgm
+;
+; GFX-950-LABEL: fptrunc_f32_to_bf16_abs:
+; GFX-950: ; %bb.0: ; %entry
+; GFX-950-NEXT: v_mov_b32_e32 v3, v2
+; GFX-950-NEXT: v_mov_b32_e32 v2, v1
+; GFX-950-NEXT: v_and_b32_e32 v1, 0x7fffffff, v0
+; GFX-950-NEXT: v_bfe_u32 v4, v1, 16, 1
+; GFX-950-NEXT: s_movk_i32 s0, 0x7fff
+; GFX-950-NEXT: v_add3_u32 v4, v4, v1, s0
+; GFX-950-NEXT: v_or_b32_e32 v1, 0x400000, v1
+; GFX-950-NEXT: v_cmp_u_f32_e64 vcc, |v0|, |v0|
+; GFX-950-NEXT: s_nop 1
+; GFX-950-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX-950-NEXT: flat_store_short_d16_hi v[2:3], v0
+; GFX-950-NEXT: s_endpgm
entry:
%a.abs = call float @llvm.fabs.f32(float %a)
%a.cvt = fptrunc float %a.abs to bfloat
@@ -240,20 +270,35 @@ entry:
}
define amdgpu_ps void @fptrunc_f32_to_bf16_neg(float %a, ptr %out) {
-; GCN-LABEL: fptrunc_f32_to_bf16_neg:
-; GCN: ; %bb.0: ; %entry
-; GCN-NEXT: v_mov_b32_e32 v3, v2
-; GCN-NEXT: v_mov_b32_e32 v2, v1
-; GCN-NEXT: v_xor_b32_e32 v1, 0x80000000, v0
-; GCN-NEXT: v_bfe_u32 v4, v1, 16, 1
-; GCN-NEXT: s_movk_i32 s0, 0x7fff
-; GCN-NEXT: v_add3_u32 v4, v4, v1, s0
-; GCN-NEXT: v_or_b32_e32 v1, 0x400000, v1
-; GCN-NEXT: v_cmp_u_f32_e64 vcc, -v0, -v0
-; GCN-NEXT: s_nop 1
-; GCN-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
-; GCN-NEXT: flat_store_short_d16_hi v[2:3], v0 sc0 sc1
-; GCN-NEXT: s_endpgm
+; GFX-940-LABEL: fptrunc_f32_to_bf16_neg:
+; GFX-940: ; %bb.0: ; %entry
+; GFX-940-NEXT: v_mov_b32_e32 v3, v2
+; GFX-940-NEXT: v_mov_b32_e32 v2, v1
+; GFX-940-NEXT: v_xor_b32_e32 v1, 0x80000000, v0
+; GFX-940-NEXT: v_bfe_u32 v4, v1, 16, 1
+; GFX-940-NEXT: s_movk_i32 s0, 0x7fff
+; GFX-940-NEXT: v_add3_u32 v4, v4, v1, s0
+; GFX-940-NEXT: v_or_b32_e32 v1, 0x400000, v1
+; GFX-940-NEXT: v_cmp_u_f32_e64 vcc, -v0, -v0
+; GFX-940-NEXT: s_nop 1
+; GFX-940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX-940-NEXT: flat_store_short_d16_hi v[2:3], v0 sc0 sc1
+; GFX-940-NEXT: s_endpgm
+;
+; GFX-950-LABEL: fptrunc_f32_to_bf16_neg:
+; GFX-950: ; %bb.0: ; %entry
+; GFX-950-NEXT: v_mov_b32_e32 v3, v2
+; GFX-950-NEXT: v_mov_b32_e32 v2, v1
+; GFX-950-NEXT: v_xor_b32_e32 v1, 0x80000000, v0
+; GFX-950-NEXT: v_bfe_u32 v4, v1, 16, 1
+; GFX-950-NEXT: s_movk_i32 s0, 0x7fff
+; GFX-950-NEXT: v_add3_u32 v4, v4, v1, s0
+; GFX-950-NEXT: v_or_b32_e32 v1, 0x400000, v1
+; GFX-950-NEXT: v_cmp_u_f32_e64 vcc, -v0, -v0
+; GFX-950-NEXT: s_nop 1
+; GFX-950-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX-950-NEXT: flat_store_short_d16_hi v[2:3], v0
+; GFX-950-NEXT: s_endpgm
entry:
%a.neg = fneg float %a
%a.cvt = fptrunc float %a.neg to bfloat
@@ -262,29 +307,53 @@ entry:
}
define amdgpu_ps void @fptrunc_f64_to_bf16(double %a, ptr %out) {
-; GCN-LABEL: fptrunc_f64_to_bf16:
-; GCN: ; %bb.0: ; %entry
-; GCN-NEXT: v_cvt_f32_f64_e64 v6, |v[0:1]|
-; GCN-NEXT: v_cvt_f64_f32_e32 v[4:5], v6
-; GCN-NEXT: v_and_b32_e32 v7, 1, v6
-; GCN-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, v[4:5]
-; GCN-NEXT: v_cmp_nlg_f64_e64 s[0:1], |v[0:1]|, v[4:5]
-; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v7
-; GCN-NEXT: v_cndmask_b32_e64 v4, -1, 1, s[2:3]
-; GCN-NEXT: v_add_u32_e32 v4, v6, v4
-; GCN-NEXT: s_or_b64 vcc, s[0:1], vcc
-; GCN-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
-; GCN-NEXT: s_brev_b32 s0, 1
-; GCN-NEXT: v_and_or_b32 v5, v1, s0, v4
-; GCN-NEXT: v_bfe_u32 v4, v4, 16, 1
-; GCN-NEXT: s_movk_i32 s0, 0x7fff
-; GCN-NEXT: v_add3_u32 v4, v4, v5, s0
-; GCN-NEXT: v_or_b32_e32 v5, 0x400000, v5
-; GCN-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
-; GCN-NEXT: s_nop 1
-; GCN-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
-; GCN-NEXT: flat_store_short_d16_hi v[2:3], v0 sc0 sc1
-; GCN-NEXT: s_endpgm
+; GFX-940-LABEL: fptrunc_f64_to_bf16:
+; GFX-940: ; %bb.0: ; %entry
+; GFX-940-NEXT: v_cvt_f32_f64_e64 v6, |v[0:1]|
+; GFX-940-NEXT: v_cvt_f64_f32_e32 v[4:5], v6
+; GFX-940-NEXT: v_and_b32_e32 v7, 1, v6
+; GFX-940-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, v[4:5]
+; GFX-940-NEXT: v_cmp_nlg_f64_e64 s[0:1], |v[0:1]|, v[4:5]
+; GFX-940-NEXT: v_cmp_eq_u32_e32 vcc, 1, v7
+; GFX-940-NEXT: v_cndmask_b32_e64 v4, -1, 1, s[2:3]
+; GFX-940-NEXT: v_add_u32_e32 v4, v6, v4
+; GFX-940-NEXT: s_or_b64 vcc, s[0:1], vcc
+; GFX-940-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
+; GFX-940-NEXT: s_brev_b32 s0, 1
+; GFX-940-NEXT: v_and_or_b32 v5, v1, s0, v4
+; GFX-940-NEXT: v_bfe_u32 v4, v4, 16, 1
+; GFX-940-NEXT: s_movk_i32 s0, 0x7fff
+; GFX-940-NEXT: v_add3_u32 v4, v4, v5, s0
+; GFX-940-NEXT: v_or_b32_e32 v5, 0x400000, v5
+; GFX-940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX-940-NEXT: s_nop 1
+; GFX-940-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
+; GFX-940-NEXT: flat_store_short_d16_hi v[2:3], v0 sc0 sc1
+; GFX-940-NEXT: s_endpgm
+;
+; GFX-950-LABEL: fptrunc_f64_to_bf16:
+; GFX-950: ; %bb.0: ; %entry
+; GFX-950-NEXT: v_cvt_f32_f64_e64 v6, |v[0:1]|
+; GFX-950-NEXT: v_cvt_f64_f32_e32 v[4:5], v6
+; GFX-950-NEXT: v_and_b32_e32 v7, 1, v6
+; GFX-950-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, v[4:5]
+; GFX-950-NEXT: v_cmp_nlg_f64_e64 s[0:1], |v[0:1]|, v[4:5]
+; GFX-950-NEXT: v_cmp_eq_u32_e32 vcc, 1, v7
+; GFX-950-NEXT: v_cndmask_b32_e64 v4, -1, 1, s[2:3]
+; GFX-950-NEXT: v_add_u32_e32 v4, v6, v4
+; GFX-950-NEXT: s_or_b64 vcc, s[0:1], vcc
+; GFX-950-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
+; GFX-950-NEXT: s_brev_b32 s0, 1
+; GFX-950-NEXT: v_and_or_b32 v5, v1, s0, v4
+; GFX-950-NEXT: v_bfe_u32 v4, v4, 16, 1
+; GFX-950-NEXT: s_movk_i32 s0, 0x7fff
+; GFX-950-NEXT: v_add3_u32 v4, v4, v5, s0
+; GFX-950-NEXT: v_or_b32_e32 v5, 0x400000, v5
+; GFX-950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX-950-NEXT: s_nop 1
+; GFX-950-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
+; GFX-950-NEXT: flat_store_short_d16_hi v[2:3], v0
+; GFX-950-NEXT: s_endpgm
entry:
%a.cvt = fptrunc double %a to bfloat
store bfloat %a.cvt, ptr %out
@@ -292,30 +361,55 @@ entry:
}
define amdgpu_ps void @fptrunc_f64_to_bf16_neg(double %a, ptr %out) {
-; GCN-LABEL: fptrunc_f64_to_bf16_neg:
-; GCN: ; %bb.0: ; %entry
-; GCN-NEXT: v_cvt_f32_f64_e64 v7, |v[0:1]|
-; GCN-NEXT: v_cvt_f64_f32_e32 v[4:5], v7
-; GCN-NEXT: v_and_b32_e32 v8, 1, v7
-; GCN-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, v[4:5]
-; GCN-NEXT: v_cmp_nlg_f64_e64 s[0:1], |v[0:1]|, v[4:5]
-; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8
-; GCN-NEXT: v_cndmask_b32_e64 v4, -1, 1, s[2:3]
-; GCN-NEXT: v_add_u32_e32 v4, v7, v4
-; GCN-NEXT: s_or_b64 vcc, s[0:1], vcc
-; GCN-NEXT: s_brev_b32 s4, 1
-; GCN-NEXT: v_xor_b32_e32 v6, 0x80000000, v1
-; GCN-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc
-; GCN-NEXT: v_and_or_b32 v5, v6, s4, v4
-; GCN-NEXT: v_bfe_u32 v4, v4, 16, 1
-; GCN-NEXT: s_movk_i32 s0, 0x7fff
-; GCN-NEXT: v_add3_u32 v4, v4, v5, s0
-; GCN-NEXT: v_or_b32_e32 v5, 0x400000, v5
-; GCN-NEXT: v_cmp_u_f64_e64 vcc, -v[0:1], -v[0:1]
-; GCN-NEXT: s_nop 1
-; GCN-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
-; GCN-NEXT: flat_store_short_d16_hi v[2:3], v0 sc0 sc1
-; GCN-NEXT: s_endpgm
+; GFX-940-LABEL: fptrunc_f64_to_bf16_neg:
+; GFX-940: ; %bb.0: ; %entry
+; GFX-940-NEXT: v_cvt_f32_f64_e64 v7, |v[0:1]|
+; GFX-940-NEXT: v_cvt_f64_f32_e32 v[4:5], v7
+; GFX-940-NEXT: v_and_b32_e32 v8, 1, v7
+; GFX-940-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, v[4:5]
+; GFX-940-NEXT: v_cmp_nlg_f64_e64 s[0:1], |v[0:1]|, v[4:5]
+; GFX-940-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8
+; GFX-940-NEXT: v_cndmask_b32_e64 v4, -1, 1, s[2:3]
+; GFX-940-NEXT: v_add_u32_e32 v4, v7, v4
+; GFX-940-NEXT: s_or_b64 vcc, s[0:1], vcc
+; GFX-940-NEXT: s_brev_b32 s4, 1
+; GFX-940-NEXT: v_xor_b32_e32 v6, 0x80000000, v1
+; GFX-940-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc
+; GFX-940-NEXT: v_and_or_b32 v5, v6, s4, v4
+; GFX-940-NEXT: v_bfe_u32 v4, v4, 16, 1
+; GFX-940-NEXT: s_movk_i32 s0, 0x7fff
+; GFX-940-NEXT: v_add3_u32 v4, v4, v5, s0
+; GFX-940-NEXT: v_or_b32_e32 v5, 0x400000, v5
+; GFX-940-NEXT: v_cmp_u_f64_e64 vcc, -v[0:1], -v[0:1]
+; GFX-940-NEXT: s_nop 1
+; GFX-940-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
+; GFX-940-NEXT: flat_store_short_d16_hi v[2:3], v0 sc0 sc1
+; GFX-940-NEXT: s_endpgm
+;
+; GFX-950-LABEL: fptrunc_f64_to_bf16_neg:
+; GFX-950: ; %bb.0: ; %entry
+; GFX-950-NEXT: v_cvt_f32_f64_e64 v7, |v[0:1]|
+; GFX-950-NEXT: v_cvt_f64_f32_e32 v[4:5], v7
+; GFX-950-NEXT: v_and_b32_e32 v8, 1, v7
+; GFX-950-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, v[4:5]
+; GFX-950-NEXT: v_cmp_nlg_f64_e64 s[0:1], |v[0:1]|, v[4:5]
+; GFX-950-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8
+; GFX-950-NEXT: v_cndmask_b32_e64 v4, -1, 1, s[2:3]
+; GFX-950-NEXT: v_add_u32_e32 v4, v7, v4
+; GFX-950-NEXT: s_or_b64 vcc, s[0:1], vcc
+; GFX-950-NEXT: s_brev_b32 s4, 1
+; GFX-950-NEXT: v_xor_b32_e32 v6, 0x80000000, v1
+; GFX-950-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc
+; GFX-950-NEXT: v_and_or_b32 v5, v6, s4, v4
+; GFX-950-NEXT: v_bfe_u32 v4, v4, 16, 1
+; GFX-950-NEXT: s_movk_i32 s0, 0x7fff
+; GFX-950-NEXT: v_add3_u32 v4, v4, v5, s0
+; GFX-950-NEXT: v_or_b32_e32 v5, 0x400000, v5
+; GFX-950-NEXT: v_cmp_u_f64_e64 vcc, -v[0:1], -v[0:1]
+; GFX-950-NEXT: s_nop 1
+; GFX-950-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
+; GFX-950-NEXT: flat_store_short_d16_hi v[2:3], v0
+; GFX-950-NEXT: s_endpgm
entry:
%a.neg = fneg double %a
%a.cvt = fptrunc double %a.neg to bfloat
@@ -324,30 +418,55 @@ entry:
}
define amdgpu_ps void @fptrunc_f64_to_bf16_abs(double %a, ptr %out) {
-; GCN-LABEL: fptrunc_f64_to_bf16_abs:
-; GCN: ; %bb.0: ; %entry
-; GCN-NEXT: v_cvt_f32_f64_e64 v7, |v[0:1]|
-; GCN-NEXT: v_cvt_f64_f32_e32 v[4:5], v7
-; GCN-NEXT: v_and_b32_e32 v8, 1, v7
-; GCN-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, v[4:5]
-; GCN-NEXT: v_cmp_nlg_f64_e64 s[0:1], |v[0:1]|, v[4:5]
-; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8
-; GCN-NEXT: v_cndmask_b32_e64 v4, -1, 1, s[2:3]
-; GCN-NEXT: v_add_u32_e32 v4, v7, v4
-; GCN-NEXT: s_or_b64 vcc, s[0:1], vcc
-; GCN-NEXT: v_and_b32_e32 v6, 0x7fffffff, v1
-; GCN-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc
-; GCN-NEXT: s_brev_b32 s0, 1
-; GCN-NEXT: v_and_or_b32 v5, v6, s0, v4
-; GCN-NEXT: v_bfe_u32 v4, v4, 16, 1
-; GCN-NEXT: s_movk_i32 s0, 0x7fff
-; GCN-NEXT: v_add3_u32 v4, v4, v5, s0
-; GCN-NEXT: v_or_b32_e32 v5, 0x400000, v5
-; GCN-NEXT: v_cmp_u_f64_e64 vcc, |v[0:1]|, |v[0:1]|
-; GCN-NEXT: s_nop 1
-; GCN-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
-; GCN-NEXT: flat_store_short_d16_hi v[2:3], v0 sc0 sc1
-; GCN-NEXT: s_endpgm
+; GFX-940-LABEL: fptrunc_f64_to_bf16_abs:
+; GFX-940: ; %bb.0: ; %entry
+; GFX-940-NEXT: v_cvt_f32_f64_e64 v7, |v[0:1]|
+; GFX-940-NEXT: v_cvt_f64_f32_e32 v[4:5], v7
+; GFX-940-NEXT: v_and_b32_e32 v8, 1, v7
+; GFX-940-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, v[4:5]
+; GFX-940-NEXT: v_cmp_nlg_f64_e64 s[0:1], |v[0:1]|, v[4:5]
+; GFX-940-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8
+; GFX-940-NEXT: v_cndmask_b32_e64 v4, -1, 1, s[2:3]
+; GFX-940-NEXT: v_add_u32_e32 v4, v7, v4
+; GFX-940-NEXT: s_or_b64 vcc, s[0:1], vcc
+; GFX-940-NEXT: v_and_b32_e32 v6, 0x7fffffff, v1
+; GFX-940-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc
+; GFX-940-NEXT: s_brev_b32 s0, 1
+; GFX-940-NEXT: v_and_or_b32 v5, v6, s0, v4
+; GFX-940-NEXT: v_bfe_u32 v4, v4, 16, 1
+; GFX-940-NEXT: s_movk_i32 s0, 0x7fff
+; GFX-940-NEXT: v_add3_u32 v4, v4, v5, s0
+; GFX-940-NEXT: v_or_b32_e32 v5, 0x400000, v5
+; GFX-940-NEXT: v_cmp_u_f64_e64 vcc, |v[0:1]|, |v[0:1]|
+; GFX-940-NEXT: s_nop 1
+; GFX-940-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
+; GFX-940-NEXT: flat_store_short_d16_hi v[2:3], v0 sc0 sc1
+; GFX-940-NEXT: s_endpgm
+;
+; GFX-950-LABEL: fptrunc_f64_to_bf16_abs:
+; GFX-950: ; %bb.0: ; %entry
+; GFX-950-NEXT: v_cvt_f32_f64_e64 v7, |v[0:1]|
+; GFX-950-NEXT: v_cvt_f64_f32_e32 v[4:5], v7
+; GFX-950-NEXT: v_and_b32_e32 v8, 1, v7
+; GFX-950-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, v[4:5]
+; GFX-950-NEXT: v_cmp_nlg_f64_e64 s[0:1], |v[0:1]|, v[4:5]
+; GFX-950-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8
+; GFX-950-NEXT: v_cndmask_b32_e64 v4, -1, 1, s[2:3]
+; GFX-950-NEXT: v_add_u32_e32 v4, v7, v4
+; GFX-950-NEXT: s_or_b64 vcc, s[0:1], vcc
+; GFX-950-NEXT: v_and_b32_e32 v6, 0x7fffffff, v1
+; GFX-950-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc
+; GFX-950-NEXT: s_brev_b32 s0, 1
+; GFX-950-NEXT: v_and_or_b32 v5, v6, s0, v4
+; GFX-950-NEXT: v_bfe_u32 v4, v4, 16, 1
+; GFX-950-NEXT: s_movk_i32 s0, 0x7fff
+; GFX-950-NEXT: v_add3_u32 v4, v4, v5, s0
+; GFX-950-NEXT: v_or_b32_e32 v5, 0x400000, v5
+; GFX-950-NEXT: v_cmp_u_f64_e64 vcc, |v[0:1]|, |v[0:1]|
+; GFX-950-NEXT: s_nop 1
+; GFX-950-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
+; GFX-950-NEXT: flat_store_short_d16_hi v[2:3], v0
+; GFX-950-NEXT: s_endpgm
entry:
%a.abs = call double @llvm.fabs.f64(double %a)
%a.cvt = fptrunc double %a.abs to bfloat
diff --git a/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll b/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll
index 4eac26e853c2a0..b64968c9336b93 100644
--- a/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll
+++ b/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll
@@ -80,6 +80,9 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck --check-prefixes=GFX942 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX942-NOXNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX942-XNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck --check-prefixes=GFX950 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX950-NOXNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX950-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck --check-prefixes=GFX1010 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX1010-NOXNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX1010-XNACK %s
@@ -180,6 +183,9 @@
; GFX942: .amdgcn_target "amdgcn-amd-amdhsa--gfx942"
; GFX942-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx942:xnack-"
; GFX942-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx942:xnack+"
+; GFX950: .amdgcn_target "amdgcn-amd-amdhsa--gfx950"
+; GFX950-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx950:xnack-"
+; GFX950-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx950:xnack+"
; GFX1010: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010"
; GFX1010-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010:xnack-"
; GFX1010-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010:xnack+"
diff --git a/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll b/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll
index f1f4edb94a6178..99344f16d4cd68 100644
--- a/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll
+++ b/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll
@@ -57,6 +57,7 @@
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx940 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX940 %s
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx941 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX941 %s
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx942 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX942 %s
+; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx950 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX950 %s
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx1010 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1010 %s
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx1011 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1011 %s
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx1012 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1012 %s
@@ -139,6 +140,7 @@
; GFX940: EF_AMDGPU_MACH_AMDGCN_GFX940 (0x40)
; GFX941: EF_AMDGPU_MACH_AMDGCN_GFX941 (0x4B)
; GFX942: EF_AMDGPU_MACH_AMDGCN_GFX942 (0x4C)
+; GFX950: EF_AMDGPU_MACH_AMDGCN_GFX950 (0x4F)
; GFX1010: EF_AMDGPU_MACH_AMDGCN_GFX1010 (0x33)
; GFX1011: EF_AMDGPU_MACH_AMDGCN_GFX1011 (0x34)
; GFX1012: EF_AMDGPU_MACH_AMDGCN_GFX1012 (0x35)
diff --git a/llvm/test/CodeGen/AMDGPU/elf-header-flags-sramecc.ll b/llvm/test/CodeGen/AMDGPU/elf-header-flags-sramecc.ll
index 961b89ab28f623..3ad2a9df764be5 100644
--- a/llvm/test/CodeGen/AMDGPU/elf-header-flags-sramecc.ll
+++ b/llvm/test/CodeGen/AMDGPU/elf-header-flags-sramecc.ll
@@ -12,6 +12,9 @@
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx940 < %s | llvm-readobj --file-header - | FileCheck --check-prefix=SRAM-ECC-GFX940 %s
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx940 -mattr=+sramecc < %s | llvm-readobj --file-header - | FileCheck --check-prefix=SRAM-ECC-GFX940 %s
+; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx950 < %s | llvm-readobj --file-header - | FileCheck --check-prefix=SRAM-ECC-GFX950 %s
+; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx950 -mattr=+sramecc < %s | llvm-readobj --file-header - | FileCheck --check-prefix=SRAM-ECC-GFX950 %s
+
; NO-SRAM-ECC-GFX906: Flags [
; NO-SRAM-ECC-GFX906-NEXT: EF_AMDGPU_FEATURE_XNACK_V3 (0x100)
; NO-SRAM-ECC-GFX906-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX906 (0x2F)
@@ -44,6 +47,11 @@
; SRAM-ECC-GFX940: EF_AMDGPU_MACH_AMDGCN_GFX940 (0x40)
; SRAM-ECC-GFX940: ]
+; SRAM-ECC-GFX950: Flags [
+; SRAM-ECC-GFX950: EF_AMDGPU_FEATURE_SRAMECC_V3 (0x200)
+; SRAM-ECC-GFX950: EF_AMDGPU_MACH_AMDGCN_GFX950 (0x4F)
+; SRAM-ECC-GFX950: ]
+
define amdgpu_kernel void @elf_header() {
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/fmaximum3.ll b/llvm/test/CodeGen/AMDGPU/fmaximum3.ll
index 27282a453075b3..08122cd0d89eab 100644
--- a/llvm/test/CodeGen/AMDGPU/fmaximum3.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmaximum3.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX940 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950 %s
define float @v_fmaximum3_f32(float %a, float %b, float %c) {
; GFX12-LABEL: v_fmaximum3_f32:
@@ -19,9 +20,11 @@ define float @v_fmaximum3_f32(float %a, float %b, float %c) {
; GFX9-NEXT: v_max_f32_e32 v3, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_max_f32_e32 v1, v0, v2
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.maximum.f32(float %a, float %b)
@@ -46,9 +49,11 @@ define float @v_fmaximum3_f32_commute(float %a, float %b, float %c) {
; GFX9-NEXT: v_max_f32_e32 v3, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_max_f32_e32 v1, v2, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.maximum.f32(float %a, float %b)
@@ -71,10 +76,13 @@ define amdgpu_ps i32 @s_fmaximum3_f32(float inreg %a, float inreg %b, float inre
; GFX9-NEXT: v_max_f32_e32 v1, s0, v0
; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX9-NEXT: v_max_f32_e32 v1, s2, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s2, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
; GFX9-NEXT: ; return to shader part epilog
%max0 = call float @llvm.maximum.f32(float %a, float %b)
@@ -101,9 +109,11 @@ define float @v_fmaximum3_f32_fabs0(float %a, float %b, float %c) {
; GFX9-NEXT: v_max_f32_e64 v3, |v0|, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v0|, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_max_f32_e32 v1, v0, v2
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call float @llvm.fabs.f32(float %a)
@@ -129,9 +139,11 @@ define float @v_fmaximum3_f32_fabs1(float %a, float %b, float %c) {
; GFX9-NEXT: v_max_f32_e64 v3, v0, |v1|
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v1|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_max_f32_e32 v1, v0, v2
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%b.fabs = call float @llvm.fabs.f32(float %b)
@@ -157,9 +169,11 @@ define float @v_fmaximum3_f32_fabs2(float %a, float %b, float %c) {
; GFX9-NEXT: v_max_f32_e32 v3, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_max_f32_e64 v1, v0, |v2|
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v2|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%c.fabs = call float @llvm.fabs.f32(float %c)
@@ -185,9 +199,11 @@ define float @v_fmaximum3_f32_fabs_all(float %a, float %b, float %c) {
; GFX9-NEXT: v_max_f32_e64 v3, |v0|, |v1|
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v1|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_max_f32_e64 v1, v0, |v2|
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v2|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call float @llvm.fabs.f32(float %a)
@@ -215,9 +231,11 @@ define float @v_fmaximum3_f32_fneg_all(float %a, float %b, float %c) {
; GFX9-NEXT: v_max_f32_e64 v3, -v0, -v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_max_f32_e64 v1, v0, -v2
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg float %a
@@ -245,9 +263,11 @@ define float @v_fmaximum3_f32_fneg_fabs_all(float %a, float %b, float %c) {
; GFX9-NEXT: v_max_f32_e64 v3, -|v0|, -|v1|
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -|v0|, -|v1|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_max_f32_e64 v1, v0, -|v2|
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -|v2|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call float @llvm.fabs.f32(float %a)
@@ -278,9 +298,11 @@ define float @v_fmaximum3_f32_fneg0(float %a, float %b, float %c) {
; GFX9-NEXT: v_max_f32_e64 v3, -v0, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_max_f32_e32 v1, v0, v2
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg float %a
@@ -306,9 +328,11 @@ define float @v_fmaximum3_f32_fneg1(float %a, float %b, float %c) {
; GFX9-NEXT: v_max_f32_e64 v3, v0, -v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_max_f32_e32 v1, v0, v2
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%b.fneg = fneg float %b
@@ -334,9 +358,11 @@ define float @v_fmaximum3_f32_fneg2(float %a, float %b, float %c) {
; GFX9-NEXT: v_max_f32_e32 v3, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_max_f32_e64 v1, v0, -v2
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%c.fneg = fneg float %c
@@ -362,9 +388,11 @@ define float @v_fmaximum3_f32_const0(float %b, float %c) {
; GFX9-NEXT: v_max_f32_e32 v2, 0x41000000, v0
; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: v_max_f32_e32 v2, v0, v1
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.maximum.f32(float 8.0, float %b)
@@ -389,9 +417,11 @@ define float @v_fmaximum3_f32__const2(float %a, float %b) {
; GFX9-NEXT: v_max_f32_e32 v2, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: v_max_f32_e32 v1, 0x41000000, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.maximum.f32(float %a, float %b)
@@ -416,9 +446,11 @@ define float @v_fmaximum3_f32_inlineimm0(float %b, float %c) {
; GFX9-NEXT: v_max_f32_e32 v2, 4.0, v0
; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: v_max_f32_e32 v2, v0, v1
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.maximum.f32(float 4.0, float %b)
@@ -443,9 +475,11 @@ define float @v_fmaximum3_f32__inlineimm(float %a, float %b) {
; GFX9-NEXT: v_max_f32_e32 v2, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: v_max_f32_e32 v1, 4.0, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.maximum.f32(float %a, float %b)
@@ -472,9 +506,11 @@ define float @v_fmaximum3_f32_const1_const2(float %a) {
; GFX9-NEXT: v_max_f32_e32 v1, 0x41000000, v0
; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX9-NEXT: v_max_f32_e32 v1, 0x41800000, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.maximum.f32(float %a, float 8.0)
@@ -500,15 +536,19 @@ define <2 x float> @v_fmaximum3_v2f32(<2 x float> %a, <2 x float> %b, <2 x float
; GFX9-NEXT: v_max_f32_e32 v6, v1, v3
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
; GFX9-NEXT: v_max_f32_e32 v3, v0, v2
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
; GFX9-NEXT: v_max_f32_e32 v2, v4, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v4, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
; GFX9-NEXT: v_max_f32_e32 v2, v5, v1
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v5, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %a, <2 x float> %b)
@@ -534,15 +574,19 @@ define <2 x float> @v_fmaximum3_v2f32_commute(<2 x float> %a, <2 x float> %b, <2
; GFX9-NEXT: v_max_f32_e32 v6, v1, v3
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
; GFX9-NEXT: v_max_f32_e32 v3, v0, v2
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
; GFX9-NEXT: v_max_f32_e32 v2, v0, v4
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
; GFX9-NEXT: v_max_f32_e32 v2, v1, v5
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %a, <2 x float> %b)
@@ -568,15 +612,19 @@ define <2 x float> @v_fmaximum3_v2f32__fabs_all(<2 x float> %a, <2 x float> %b,
; GFX9-NEXT: v_max_f32_e64 v6, |v1|, |v3|
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v1|, |v3|
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
; GFX9-NEXT: v_max_f32_e64 v3, |v0|, |v2|
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v2|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
; GFX9-NEXT: v_max_f32_e64 v2, v0, |v4|
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v4|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
; GFX9-NEXT: v_max_f32_e64 v2, v1, |v5|
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v1, |v5|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a)
@@ -605,15 +653,19 @@ define <2 x float> @v_fmaximum3_v2f32__fneg_all(<2 x float> %a, <2 x float> %b,
; GFX9-NEXT: v_max_f32_e64 v6, -v1, -v3
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v1, -v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
; GFX9-NEXT: v_max_f32_e64 v3, -v0, -v2
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
; GFX9-NEXT: v_max_f32_e64 v2, v0, -v4
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v4
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
; GFX9-NEXT: v_max_f32_e64 v2, v1, -v5
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v1, -v5
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg <2 x float> %a
@@ -642,15 +694,19 @@ define <2 x float> @v_fmaximum3_v2f32__inlineimm1(<2 x float> %a, <2 x float> %c
; GFX9-NEXT: v_max_f32_e32 v4, 2.0, v1
; GFX9-NEXT: v_mov_b32_e32 v5, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
; GFX9-NEXT: v_max_f32_e32 v4, 2.0, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
; GFX9-NEXT: v_max_f32_e32 v4, v0, v2
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
; GFX9-NEXT: v_max_f32_e32 v2, v1, v3
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %a, <2 x float> <float 2.0, float 2.0>)
@@ -676,15 +732,19 @@ define <2 x float> @v_fmaximum3_v2f32__inlineimm2(<2 x float> %a, <2 x float> %b
; GFX9-NEXT: v_max_f32_e32 v4, v1, v3
; GFX9-NEXT: v_mov_b32_e32 v5, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
; GFX9-NEXT: v_max_f32_e32 v3, v0, v2
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
; GFX9-NEXT: v_max_f32_e32 v2, 4.0, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc
; GFX9-NEXT: v_max_f32_e32 v2, 4.0, v1
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %a, <2 x float> %b)
@@ -711,21 +771,27 @@ define <3 x float> @v_fmaximum3_v3f32(<3 x float> %a, <3 x float> %b, <3 x float
; GFX9-NEXT: v_max_f32_e32 v9, v2, v5
; GFX9-NEXT: v_mov_b32_e32 v10, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
; GFX9-NEXT: v_max_f32_e32 v5, v1, v4
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
; GFX9-NEXT: v_max_f32_e32 v4, v0, v3
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc
; GFX9-NEXT: v_max_f32_e32 v3, v6, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v6, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc
; GFX9-NEXT: v_max_f32_e32 v3, v7, v1
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v7, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc
; GFX9-NEXT: v_max_f32_e32 v3, v8, v2
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v8, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %a, <3 x float> %b)
@@ -752,21 +818,27 @@ define <3 x float> @v_fmaximum3_v3f32_commute(<3 x float> %a, <3 x float> %b, <3
; GFX9-NEXT: v_max_f32_e32 v9, v2, v5
; GFX9-NEXT: v_mov_b32_e32 v10, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
; GFX9-NEXT: v_max_f32_e32 v5, v1, v4
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
; GFX9-NEXT: v_max_f32_e32 v4, v0, v3
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc
; GFX9-NEXT: v_max_f32_e32 v3, v0, v6
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v6
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc
; GFX9-NEXT: v_max_f32_e32 v3, v1, v7
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v7
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc
; GFX9-NEXT: v_max_f32_e32 v3, v2, v8
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v8
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %a, <3 x float> %b)
@@ -793,21 +865,27 @@ define <3 x float> @v_fmaximum3_v3f32__fabs_all(<3 x float> %a, <3 x float> %b,
; GFX9-NEXT: v_max_f32_e64 v9, |v2|, |v5|
; GFX9-NEXT: v_mov_b32_e32 v10, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v2|, |v5|
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
; GFX9-NEXT: v_max_f32_e64 v5, |v1|, |v4|
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v1|, |v4|
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
; GFX9-NEXT: v_max_f32_e64 v4, |v0|, |v3|
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v3|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc
; GFX9-NEXT: v_max_f32_e64 v3, v0, |v6|
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v6|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc
; GFX9-NEXT: v_max_f32_e64 v3, v1, |v7|
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v1, |v7|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc
; GFX9-NEXT: v_max_f32_e64 v3, v2, |v8|
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v2, |v8|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call <3 x float> @llvm.fabs.v3f32(<3 x float> %a)
@@ -837,21 +915,27 @@ define <3 x float> @v_fmaximum3_v3f32__fneg_all(<3 x float> %a, <3 x float> %b,
; GFX9-NEXT: v_max_f32_e64 v9, -v2, -v5
; GFX9-NEXT: v_mov_b32_e32 v10, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v2, -v5
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
; GFX9-NEXT: v_max_f32_e64 v5, -v1, -v4
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v1, -v4
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
; GFX9-NEXT: v_max_f32_e64 v4, -v0, -v3
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v3
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc
; GFX9-NEXT: v_max_f32_e64 v3, v0, -v6
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v6
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc
; GFX9-NEXT: v_max_f32_e64 v3, v1, -v7
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v1, -v7
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc
; GFX9-NEXT: v_max_f32_e64 v3, v2, -v8
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v2, -v8
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg <3 x float> %a
@@ -881,21 +965,27 @@ define <3 x float> @v_fmaximum3_v3f32__inlineimm1(<3 x float> %a, <3 x float> %c
; GFX9-NEXT: v_max_f32_e32 v6, 2.0, v2
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc
; GFX9-NEXT: v_max_f32_e32 v6, 2.0, v1
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
; GFX9-NEXT: v_max_f32_e32 v6, 2.0, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX9-NEXT: v_max_f32_e32 v6, v0, v3
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX9-NEXT: v_max_f32_e32 v3, v1, v4
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
; GFX9-NEXT: v_max_f32_e32 v3, v2, v5
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %a, <3 x float> <float 2.0, float 2.0, float 2.0>)
@@ -922,21 +1012,27 @@ define <3 x float> @v_fmaximum3_v3f32__inlineimm2(<3 x float> %a, <3 x float> %b
; GFX9-NEXT: v_max_f32_e32 v6, v2, v5
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc
; GFX9-NEXT: v_max_f32_e32 v5, v1, v4
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v5, vcc
; GFX9-NEXT: v_max_f32_e32 v4, v0, v3
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v5, vcc
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v4, vcc
; GFX9-NEXT: v_max_f32_e32 v3, 4.0, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
; GFX9-NEXT: v_max_f32_e32 v3, 4.0, v1
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
; GFX9-NEXT: v_max_f32_e32 v3, 4.0, v2
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %a, <3 x float> %b)
@@ -962,9 +1058,11 @@ define half @v_fmaximum3_f16(half %a, half %b, half %c) {
; GFX9-NEXT: v_max_f16_e32 v3, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_max_f16_e32 v1, v0, v2
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call half @llvm.maximum.f16(half %a, half %b)
@@ -989,9 +1087,11 @@ define half @v_fmaximum3_f16_commute(half %a, half %b, half %c) {
; GFX9-NEXT: v_max_f16_e32 v3, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_max_f16_e32 v1, v2, v0
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v2, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call half @llvm.maximum.f16(half %a, half %b)
@@ -1016,11 +1116,14 @@ define amdgpu_ps i32 @s_fmaximum3_f16(half inreg %a, half inreg %b, half inreg %
; GFX9-NEXT: v_max_f16_e32 v1, s0, v0
; GFX9-NEXT: v_mov_b32_e32 v2, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, s0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX9-NEXT: v_max_f16_e32 v1, s2, v0
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, s2, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
; GFX9-NEXT: ; return to shader part epilog
%max0 = call half @llvm.maximum.f16(half %a, half %b)
@@ -1048,9 +1151,11 @@ define half @v_fmaximum3_f16_fabs0(half %a, half %b, half %c) {
; GFX9-NEXT: v_max_f16_e64 v3, |v0|, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, |v0|, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_max_f16_e32 v1, v0, v2
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call half @llvm.fabs.f16(half %a)
@@ -1076,9 +1181,11 @@ define half @v_fmaximum3_f16_fabs1(half %a, half %b, half %c) {
; GFX9-NEXT: v_max_f16_e64 v3, v0, |v1|
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v0, |v1|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_max_f16_e32 v1, v0, v2
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%b.fabs = call half @llvm.fabs.f16(half %b)
@@ -1104,9 +1211,11 @@ define half @v_fmaximum3_f16_fabs2(half %a, half %b, half %c) {
; GFX9-NEXT: v_max_f16_e32 v3, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_max_f16_e64 v1, v0, |v2|
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v0, |v2|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%c.fabs = call half @llvm.fabs.f16(half %c)
@@ -1132,9 +1241,11 @@ define half @v_fmaximum3_f16_fabs_all(half %a, half %b, half %c) {
; GFX9-NEXT: v_max_f16_e64 v3, |v0|, |v1|
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, |v0|, |v1|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_max_f16_e64 v1, v0, |v2|
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v0, |v2|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call half @llvm.fabs.f16(half %a)
@@ -1162,9 +1273,11 @@ define half @v_fmaximum3_f16_fneg_all(half %a, half %b, half %c) {
; GFX9-NEXT: v_max_f16_e64 v3, -v0, -v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, -v0, -v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_max_f16_e64 v1, v0, -v2
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v0, -v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg half %a
@@ -1192,9 +1305,11 @@ define half @v_fmaximum3_f16_fneg_fabs_all(half %a, half %b, half %c) {
; GFX9-NEXT: v_max_f16_e64 v3, -|v0|, -|v1|
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, -|v0|, -|v1|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_max_f16_e64 v1, v0, -|v2|
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v0, -|v2|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call half @llvm.fabs.f16(half %a)
@@ -1225,9 +1340,11 @@ define half @v_fmaximum3_f16_fneg0(half %a, half %b, half %c) {
; GFX9-NEXT: v_max_f16_e64 v3, -v0, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, -v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_max_f16_e32 v1, v0, v2
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg half %a
@@ -1253,9 +1370,11 @@ define half @v_fmaximum3_f16_fneg1(half %a, half %b, half %c) {
; GFX9-NEXT: v_max_f16_e64 v3, v0, -v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v0, -v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_max_f16_e32 v1, v0, v2
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%b.fneg = fneg half %b
@@ -1281,9 +1400,11 @@ define half @v_fmaximum3_f16_fneg2(half %a, half %b, half %c) {
; GFX9-NEXT: v_max_f16_e32 v3, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_max_f16_e64 v1, v0, -v2
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v0, -v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%c.fneg = fneg half %c
@@ -1309,9 +1430,11 @@ define half @v_fmaximum3_f16_const0(half %b, half %c) {
; GFX9-NEXT: v_max_f16_e32 v2, 0x4800, v0
; GFX9-NEXT: v_mov_b32_e32 v3, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: v_max_f16_e32 v2, v0, v1
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call half @llvm.maximum.f16(half 8.0, half %b)
@@ -1336,9 +1459,11 @@ define half @v_fmaximum3_f16__const2(half %a, half %b) {
; GFX9-NEXT: v_max_f16_e32 v2, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v3, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: v_max_f16_e32 v1, 0x4800, v0
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call half @llvm.maximum.f16(half %a, half %b)
@@ -1363,9 +1488,11 @@ define half @v_fmaximum3_f16_inlineimm0(half %b, half %c) {
; GFX9-NEXT: v_max_f16_e32 v2, 4.0, v0
; GFX9-NEXT: v_mov_b32_e32 v3, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: v_max_f16_e32 v2, v0, v1
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call half @llvm.maximum.f16(half 4.0, half %b)
@@ -1390,9 +1517,11 @@ define half @v_fmaximum3_f16__inlineimm(half %a, half %b) {
; GFX9-NEXT: v_max_f16_e32 v2, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v3, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: v_max_f16_e32 v1, 4.0, v0
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call half @llvm.maximum.f16(half %a, half %b)
@@ -1419,9 +1548,11 @@ define half @v_fmaximum3_f16_const1_const2(half %a) {
; GFX9-NEXT: v_max_f16_e32 v1, 0x4800, v0
; GFX9-NEXT: v_mov_b32_e32 v2, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX9-NEXT: v_max_f16_e32 v1, 0x4c00, v0
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call half @llvm.maximum.f16(half %a, half 8.0)
@@ -1448,19 +1579,23 @@ define <2 x half> @v_fmaximum3_v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c
; GFX9-NEXT: v_pk_max_f16 v3, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v5, v4, v3, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v1, v0, v5, s4
+; GFX9-NEXT: v_perm_b32 v1, v0, v5, s0
; GFX9-NEXT: v_pk_max_f16 v1, v2, v1
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v2, v5
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v2, v0 src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX9-NEXT: v_perm_b32 v0, v0, v3, s4
+; GFX9-NEXT: v_perm_b32 v0, v0, v3, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> %b)
%max1 = call <2 x half> @llvm.maximum.v2f16(<2 x half> %c, <2 x half> %max0)
@@ -1486,19 +1621,23 @@ define <2 x half> @v_fmaximum3_v2f16_commute(<2 x half> %a, <2 x half> %b, <2 x
; GFX9-NEXT: v_pk_max_f16 v3, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v5, v4, v3, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v1, v0, v5, s4
+; GFX9-NEXT: v_perm_b32 v1, v0, v5, s0
; GFX9-NEXT: v_pk_max_f16 v1, v1, v2
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v5, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX9-NEXT: v_perm_b32 v0, v0, v3, s4
+; GFX9-NEXT: v_perm_b32 v0, v0, v3, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> %b)
%max1 = call <2 x half> @llvm.maximum.v2f16(<2 x half> %max0, <2 x half> %c)
@@ -1527,22 +1666,25 @@ define <2 x half> @v_fmaximum3_v2f16__fabs_all(<2 x half> %a, <2 x half> %b, <2
; GFX9-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v0
; GFX9-NEXT: v_and_b32_e32 v4, 0x7fff7fff, v1
; GFX9-NEXT: v_pk_max_f16 v3, v3, v4
-; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v3
; GFX9-NEXT: v_mov_b32_e32 v6, 0x7e00
+; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v3
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, |v0|, |v1| src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
+; GFX9-NEXT: v_and_b32_e32 v5, 0x7fff7fff, v2
; GFX9-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, |v0|, |v1|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v6, v3, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_and_b32_e32 v5, 0x7fff7fff, v2
-; GFX9-NEXT: v_perm_b32 v1, v4, v0, s4
+; GFX9-NEXT: v_perm_b32 v1, v4, v0, s0
; GFX9-NEXT: v_pk_max_f16 v1, v1, v5
-; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v1
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v4, |v2| src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v1
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v0, |v2|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v6, v1, vcc
-; GFX9-NEXT: v_perm_b32 v0, v3, v0, s4
+; GFX9-NEXT: v_perm_b32 v0, v3, v0, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %a)
%b.fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %b)
@@ -1571,19 +1713,23 @@ define <2 x half> @v_fmaximum3_v2f16__fneg_all(<2 x half> %a, <2 x half> %b, <2
; GFX9-NEXT: v_pk_max_f16 v3, v0, v1 neg_lo:[1,1] neg_hi:[1,1]
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, -v0, -v1
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v5, v4, v3, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, -v0, -v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v1, v0, v5, s4
+; GFX9-NEXT: v_perm_b32 v1, v0, v5, s0
; GFX9-NEXT: v_pk_max_f16 v1, v1, v2 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v5, -v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, -v2 src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX9-NEXT: v_perm_b32 v0, v0, v3, s4
+; GFX9-NEXT: v_perm_b32 v0, v0, v3, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg <2 x half> %a
%b.fneg = fneg <2 x half> %b
@@ -1610,21 +1756,25 @@ define <2 x half> @v_fmaximum3_v2f16__inlineimm1(<2 x half> %a, <2 x half> %c) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_pk_max_f16 v2, v0, 2.0 op_sel_hi:[1,0]
-; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v2
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
+; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v2, v3, v0, s4
+; GFX9-NEXT: v_perm_b32 v2, v3, v0, s0
; GFX9-NEXT: v_pk_max_f16 v2, v2, v1
-; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v3, v1 src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v2
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
-; GFX9-NEXT: v_perm_b32 v0, v3, v0, s4
+; GFX9-NEXT: v_perm_b32 v0, v3, v0, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> <half 2.0, half 2.0>)
%max1 = call <2 x half> @llvm.maximum.v2f16(<2 x half> %max0, <2 x half> %c)
@@ -1650,19 +1800,23 @@ define <2 x half> @v_fmaximum3_v2f16__inlineimm2(<2 x half> %a, <2 x half> %b) {
; GFX9-NEXT: v_pk_max_f16 v2, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v3, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v4, v3, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v1, v0, v4, s4
+; GFX9-NEXT: v_perm_b32 v1, v0, v4, s0
; GFX9-NEXT: v_pk_max_f16 v1, v1, 4.0 op_sel_hi:[1,0]
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v4, v4
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v1, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc
-; GFX9-NEXT: v_perm_b32 v0, v0, v2, s4
+; GFX9-NEXT: v_perm_b32 v0, v0, v2, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> %b)
%max1 = call <2 x half> @llvm.maximum.v2f16(<2 x half> %max0, <2 x half> <half 4.0, half 4.0>)
@@ -1690,29 +1844,35 @@ define <3 x half> @v_fmaximum3_v3f16(<3 x half> %a, <3 x half> %b, <3 x half> %c
; GFX9-NEXT: v_pk_max_f16 v6, v0, v2
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v8, v7, v6, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v6
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX9-NEXT: v_pk_max_f16 v2, v1, v3
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v6, v7, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v1, v1, v6, s4
+; GFX9-NEXT: v_perm_b32 v1, v1, v6, s0
; GFX9-NEXT: v_pk_max_f16 v1, v5, v1
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v5, v6
-; GFX9-NEXT: v_perm_b32 v2, v0, v8, s4
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
+; GFX9-NEXT: v_perm_b32 v2, v0, v8, s0
; GFX9-NEXT: v_pk_max_f16 v2, v4, v2
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v4, v8
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v4, v0 src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX9-NEXT: v_perm_b32 v0, v0, v3, s4
+; GFX9-NEXT: v_perm_b32 v0, v0, v3, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x half> @llvm.maximum.v3f16(<3 x half> %a, <3 x half> %b)
%max1 = call <3 x half> @llvm.maximum.v3f16(<3 x half> %c, <3 x half> %max0)
@@ -1740,29 +1900,35 @@ define <3 x half> @v_fmaximum3_v3f16_commute(<3 x half> %a, <3 x half> %b, <3 x
; GFX9-NEXT: v_pk_max_f16 v6, v0, v2
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v8, v7, v6, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v6
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX9-NEXT: v_pk_max_f16 v2, v1, v3
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v6, v7, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v1, v1, v6, s4
+; GFX9-NEXT: v_perm_b32 v1, v1, v6, s0
; GFX9-NEXT: v_pk_max_f16 v1, v1, v5
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v6, v5
-; GFX9-NEXT: v_perm_b32 v2, v0, v8, s4
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
+; GFX9-NEXT: v_perm_b32 v2, v0, v8, s0
; GFX9-NEXT: v_pk_max_f16 v2, v2, v4
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v8, v4
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX9-NEXT: v_perm_b32 v0, v0, v3, s4
+; GFX9-NEXT: v_perm_b32 v0, v0, v3, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x half> @llvm.maximum.v3f16(<3 x half> %a, <3 x half> %b)
%max1 = call <3 x half> @llvm.maximum.v3f16(<3 x half> %max0, <3 x half> %c)
@@ -1799,33 +1965,37 @@ define <3 x half> @v_fmaximum3_v3f16__fabs_all(<3 x half> %a, <3 x half> %b, <3
; GFX9-NEXT: v_and_b32_e32 v6, 0x7fff7fff, v0
; GFX9-NEXT: v_and_b32_e32 v8, 0x7fff7fff, v2
; GFX9-NEXT: v_pk_max_f16 v7, v7, v9
-; GFX9-NEXT: v_lshrrev_b32_e32 v9, 16, v7
; GFX9-NEXT: v_mov_b32_e32 v12, 0x7e00
+; GFX9-NEXT: v_lshrrev_b32_e32 v9, 16, v7
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, |v1|, |v3| src0_sel:WORD_1 src1_sel:WORD_1
; GFX9-NEXT: v_pk_max_f16 v6, v6, v8
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
; GFX9-NEXT: v_cndmask_b32_e32 v9, v12, v9, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v8, 16, v6
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, |v0|, |v2| src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: v_and_b32_e32 v11, 0x7fff7fff, v4
+; GFX9-NEXT: v_and_b32_e32 v10, 0x7fff7fff, v5
; GFX9-NEXT: v_cndmask_b32_e32 v8, v12, v8, vcc
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, |v1|, |v3|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v12, v7, vcc
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, |v0|, |v2|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v12, v6, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_and_b32_e32 v11, 0x7fff7fff, v4
-; GFX9-NEXT: v_perm_b32 v2, v8, v0, s4
+; GFX9-NEXT: v_perm_b32 v2, v8, v0, s0
; GFX9-NEXT: v_pk_max_f16 v2, v2, v11
-; GFX9-NEXT: v_and_b32_e32 v10, 0x7fff7fff, v5
-; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v8, |v4| src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT: v_perm_b32 v6, v9, v1, s4
+; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v2
+; GFX9-NEXT: v_perm_b32 v6, v9, v1, s0
; GFX9-NEXT: v_cndmask_b32_e32 v3, v12, v3, vcc
; GFX9-NEXT: v_pk_max_f16 v6, v6, v10
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v1, |v5|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v12, v6, vcc
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v0, |v4|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v12, v2, vcc
-; GFX9-NEXT: v_perm_b32 v0, v3, v0, s4
+; GFX9-NEXT: v_perm_b32 v0, v3, v0, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call <3 x half> @llvm.fabs.v3f16(<3 x half> %a)
%b.fabs = call <3 x half> @llvm.fabs.v3f16(<3 x half> %b)
@@ -1856,29 +2026,35 @@ define <3 x half> @v_fmaximum3_v3f16__fneg_all(<3 x half> %a, <3 x half> %b, <3
; GFX9-NEXT: v_pk_max_f16 v6, v0, v2 neg_lo:[1,1] neg_hi:[1,1]
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, -v0, -v2
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v8, v7, v6, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v6
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, -v0, -v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX9-NEXT: v_pk_max_f16 v2, v1, v3 neg_lo:[1,1] neg_hi:[1,1]
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, -v1, -v3
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v6, v7, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, -v1, -v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v1, v1, v6, s4
+; GFX9-NEXT: v_perm_b32 v1, v1, v6, s0
; GFX9-NEXT: v_pk_max_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v6, -v5
-; GFX9-NEXT: v_perm_b32 v2, v0, v8, s4
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
+; GFX9-NEXT: v_perm_b32 v2, v0, v8, s0
; GFX9-NEXT: v_pk_max_f16 v2, v2, v4 neg_lo:[0,1] neg_hi:[0,1]
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v8, -v4
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, -v4 src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX9-NEXT: v_perm_b32 v0, v0, v3, s4
+; GFX9-NEXT: v_perm_b32 v0, v0, v3, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg <3 x half> %a
%b.fneg = fneg <3 x half> %b
@@ -1907,29 +2083,34 @@ define <3 x half> @v_fmaximum3_v3f16__inlineimm1(<3 x half> %a, <3 x half> %c) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_pk_max_f16 v4, v0, 2.0 op_sel_hi:[1,0]
-; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v4
; GFX9-NEXT: v_mov_b32_e32 v6, 0x7e00
+; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v4
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc
; GFX9-NEXT: v_pk_max_f16 v7, v1, 2.0
+; GFX9-NEXT: s_mov_b32 s1, 0x5040100
+; GFX9-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v1
+; GFX9-NEXT: s_movk_i32 s0, 0x7e00
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v7, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v6, v4, vcc
-; GFX9-NEXT: s_mov_b32 s5, 0x5040100
-; GFX9-NEXT: v_perm_b32 v4, v5, v0, s5
+; GFX9-NEXT: v_perm_b32 v4, v5, v0, s1
; GFX9-NEXT: v_pk_max_f16 v4, v4, v2
-; GFX9-NEXT: s_movk_i32 s4, 0x7e00
-; GFX9-NEXT: v_lshrrev_b32_e32 v7, 16, v4
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v5, v2 src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: v_lshrrev_b32_e32 v7, 16, v4
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v5, v6, v7, vcc
-; GFX9-NEXT: v_pack_b32_f16 v7, v1, s4
+; GFX9-NEXT: v_pack_b32_f16 v7, v1, s0
; GFX9-NEXT: v_pk_max_f16 v7, v7, v3
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v7, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v6, v4, vcc
-; GFX9-NEXT: v_perm_b32 v0, v5, v0, s5
+; GFX9-NEXT: v_perm_b32 v0, v5, v0, s1
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x half> @llvm.maximum.v3f16(<3 x half> %a, <3 x half> <half 2.0, half 2.0, half 2.0>)
%max1 = call <3 x half> @llvm.maximum.v3f16(<3 x half> %max0, <3 x half> %c)
@@ -1957,29 +2138,35 @@ define <3 x half> @v_fmaximum3_v3f16__inlineimm2(<3 x half> %a, <3 x half> %b) {
; GFX9-NEXT: v_pk_max_f16 v4, v0, v2
; GFX9-NEXT: v_mov_b32_e32 v5, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v4
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
; GFX9-NEXT: v_pk_max_f16 v2, v1, v3
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v1, v1, v4, s4
+; GFX9-NEXT: v_perm_b32 v1, v1, v4, s0
; GFX9-NEXT: v_pk_max_f16 v1, v1, 4.0
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v4, v4
-; GFX9-NEXT: v_perm_b32 v2, v0, v6, s4
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX9-NEXT: v_perm_b32 v2, v0, v6, s0
; GFX9-NEXT: v_pk_max_f16 v2, v2, 4.0 op_sel_hi:[1,0]
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v6, v6
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v3, v5, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc
-; GFX9-NEXT: v_perm_b32 v0, v0, v3, s4
+; GFX9-NEXT: v_perm_b32 v0, v0, v3, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x half> @llvm.maximum.v3f16(<3 x half> %a, <3 x half> %b)
%max1 = call <3 x half> @llvm.maximum.v3f16(<3 x half> %max0, <3 x half> <half 4.0, half 4.0, half 4.0>)
@@ -2007,33 +2194,41 @@ define <4 x half> @v_fmaximum3_v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c
; GFX9-NEXT: v_pk_max_f16 v6, v0, v2
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v8, v7, v6, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v6
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX9-NEXT: v_pk_max_f16 v2, v1, v3
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v6, v7, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v2, v1, v6, s4
+; GFX9-NEXT: v_perm_b32 v2, v1, v6, s0
; GFX9-NEXT: v_pk_max_f16 v2, v5, v2
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v5, v6
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v5, v1 src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX9-NEXT: v_perm_b32 v2, v0, v8, s4
+; GFX9-NEXT: v_perm_b32 v2, v0, v8, s0
; GFX9-NEXT: v_pk_max_f16 v2, v4, v2
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v4, v8
+; GFX9-NEXT: v_perm_b32 v1, v1, v3, s0
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v5, v7, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v4, v0 src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX9-NEXT: v_perm_b32 v0, v0, v5, s4
-; GFX9-NEXT: v_perm_b32 v1, v1, v3, s4
+; GFX9-NEXT: v_perm_b32 v0, v0, v5, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <4 x half> @llvm.maximum.v4f16(<4 x half> %a, <4 x half> %b)
%max1 = call <4 x half> @llvm.maximum.v4f16(<4 x half> %c, <4 x half> %max0)
@@ -2061,33 +2256,41 @@ define <4 x half> @v_fmaximum3_v4f16_commute(<4 x half> %a, <4 x half> %b, <4 x
; GFX9-NEXT: v_pk_max_f16 v6, v0, v2
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v8, v7, v6, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v6
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX9-NEXT: v_pk_max_f16 v2, v1, v3
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v6, v7, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v2, v1, v6, s4
+; GFX9-NEXT: v_perm_b32 v2, v1, v6, s0
; GFX9-NEXT: v_pk_max_f16 v2, v2, v5
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v6, v5
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, v5 src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX9-NEXT: v_perm_b32 v2, v0, v8, s4
+; GFX9-NEXT: v_perm_b32 v2, v0, v8, s0
; GFX9-NEXT: v_pk_max_f16 v2, v2, v4
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v8, v4
+; GFX9-NEXT: v_perm_b32 v1, v1, v3, s0
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v5, v7, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX9-NEXT: v_perm_b32 v0, v0, v5, s4
-; GFX9-NEXT: v_perm_b32 v1, v1, v3, s4
+; GFX9-NEXT: v_perm_b32 v0, v0, v5, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <4 x half> @llvm.maximum.v4f16(<4 x half> %a, <4 x half> %b)
%max1 = call <4 x half> @llvm.maximum.v4f16(<4 x half> %max0, <4 x half> %c)
@@ -2124,37 +2327,43 @@ define <4 x half> @v_fmaximum3_v4f16__fabs_all(<4 x half> %a, <4 x half> %b, <4
; GFX9-NEXT: v_and_b32_e32 v6, 0x7fff7fff, v1
; GFX9-NEXT: v_and_b32_e32 v8, 0x7fff7fff, v3
; GFX9-NEXT: v_pk_max_f16 v7, v7, v9
-; GFX9-NEXT: v_lshrrev_b32_e32 v9, 16, v7
; GFX9-NEXT: v_mov_b32_e32 v12, 0x7e00
+; GFX9-NEXT: v_lshrrev_b32_e32 v9, 16, v7
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, |v0|, |v2| src0_sel:WORD_1 src1_sel:WORD_1
; GFX9-NEXT: v_pk_max_f16 v6, v6, v8
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
; GFX9-NEXT: v_cndmask_b32_e32 v9, v12, v9, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v8, 16, v6
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, |v1|, |v3| src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: v_and_b32_e32 v11, 0x7fff7fff, v5
+; GFX9-NEXT: v_and_b32_e32 v10, 0x7fff7fff, v4
; GFX9-NEXT: v_cndmask_b32_e32 v8, v12, v8, vcc
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, |v0|, |v2|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v12, v7, vcc
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, |v1|, |v3|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v12, v6, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_and_b32_e32 v11, 0x7fff7fff, v5
-; GFX9-NEXT: v_perm_b32 v2, v8, v1, s4
-; GFX9-NEXT: v_and_b32_e32 v10, 0x7fff7fff, v4
+; GFX9-NEXT: v_perm_b32 v2, v8, v1, s0
; GFX9-NEXT: v_pk_max_f16 v2, v2, v11
-; GFX9-NEXT: v_perm_b32 v6, v9, v0, s4
+; GFX9-NEXT: v_perm_b32 v6, v9, v0, s0
; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v8, |v5| src0_sel:DWORD src1_sel:WORD_1
; GFX9-NEXT: v_pk_max_f16 v6, v6, v10
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v3, v12, v3, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v7, 16, v6
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v9, |v4| src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v7, v12, v7, vcc
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v1, |v5|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v12, v2, vcc
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v0, |v4|
+; GFX9-NEXT: v_perm_b32 v1, v3, v1, s0
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v0, v12, v6, vcc
-; GFX9-NEXT: v_perm_b32 v0, v7, v0, s4
-; GFX9-NEXT: v_perm_b32 v1, v3, v1, s4
+; GFX9-NEXT: v_perm_b32 v0, v7, v0, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call <4 x half> @llvm.fabs.v4f16(<4 x half> %a)
%b.fabs = call <4 x half> @llvm.fabs.v4f16(<4 x half> %b)
@@ -2185,33 +2394,41 @@ define <4 x half> @v_fmaximum3_v4f16__fneg_all(<4 x half> %a, <4 x half> %b, <4
; GFX9-NEXT: v_pk_max_f16 v6, v0, v2 neg_lo:[1,1] neg_hi:[1,1]
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, -v0, -v2
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v8, v7, v6, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v6
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, -v0, -v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX9-NEXT: v_pk_max_f16 v2, v1, v3 neg_lo:[1,1] neg_hi:[1,1]
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, -v1, -v3
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v6, v7, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, -v1, -v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v2, v1, v6, s4
+; GFX9-NEXT: v_perm_b32 v2, v1, v6, s0
; GFX9-NEXT: v_pk_max_f16 v2, v2, v5 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v6, -v5
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, -v5 src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX9-NEXT: v_perm_b32 v2, v0, v8, s4
+; GFX9-NEXT: v_perm_b32 v2, v0, v8, s0
; GFX9-NEXT: v_pk_max_f16 v2, v2, v4 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v8, -v4
+; GFX9-NEXT: v_perm_b32 v1, v1, v3, s0
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v5, v7, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, -v4 src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX9-NEXT: v_perm_b32 v0, v0, v5, s4
-; GFX9-NEXT: v_perm_b32 v1, v1, v3, s4
+; GFX9-NEXT: v_perm_b32 v0, v0, v5, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg <4 x half> %a
%b.fneg = fneg <4 x half> %b
@@ -2240,35 +2457,41 @@ define <4 x half> @v_fmaximum3_v4f16__inlineimm1(<4 x half> %a, <4 x half> %c) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_pk_max_f16 v4, v0, 2.0 op_sel_hi:[1,0]
-; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v4
; GFX9-NEXT: v_mov_b32_e32 v6, 0x7e00
+; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v4
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1
; GFX9-NEXT: v_pk_max_f16 v7, v1, 2.0 op_sel_hi:[1,0]
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
; GFX9-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v8, 16, v7
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v8, v6, v8, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v6, v4, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v7, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v4, v8, v1, s4
+; GFX9-NEXT: v_perm_b32 v4, v8, v1, s0
; GFX9-NEXT: v_pk_max_f16 v4, v4, v3
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v8, v3 src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT: v_perm_b32 v8, v5, v0, s4
+; GFX9-NEXT: v_perm_b32 v8, v5, v0, s0
; GFX9-NEXT: v_lshrrev_b32_e32 v7, 16, v4
; GFX9-NEXT: v_pk_max_f16 v8, v8, v2
; GFX9-NEXT: v_cndmask_b32_e32 v7, v6, v7, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v9, 16, v8
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v5, v2 src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v5, v6, v9, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v4, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX9-NEXT: v_perm_b32 v1, v7, v1, s0
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v0, v6, v8, vcc
-; GFX9-NEXT: v_perm_b32 v0, v5, v0, s4
-; GFX9-NEXT: v_perm_b32 v1, v7, v1, s4
+; GFX9-NEXT: v_perm_b32 v0, v5, v0, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <4 x half> @llvm.maximum.v4f16(<4 x half> %a, <4 x half> <half 2.0, half 2.0, half 2.0, half 2.0>)
%max1 = call <4 x half> @llvm.maximum.v4f16(<4 x half> %max0, <4 x half> %c)
@@ -2296,33 +2519,41 @@ define <4 x half> @v_fmaximum3_v4f16__inlineimm2(<4 x half> %a, <4 x half> %b) {
; GFX9-NEXT: v_pk_max_f16 v4, v0, v2
; GFX9-NEXT: v_mov_b32_e32 v5, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v4
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
; GFX9-NEXT: v_pk_max_f16 v2, v1, v3
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v2, v1, v4, s4
+; GFX9-NEXT: v_perm_b32 v2, v1, v4, s0
; GFX9-NEXT: v_pk_max_f16 v2, v2, 4.0 op_sel_hi:[1,0]
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v4, v4
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v3, v5, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
-; GFX9-NEXT: v_perm_b32 v2, v0, v6, s4
+; GFX9-NEXT: v_perm_b32 v2, v0, v6, s0
; GFX9-NEXT: v_pk_max_f16 v2, v2, 4.0 op_sel_hi:[1,0]
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v6, v6
+; GFX9-NEXT: v_perm_b32 v1, v1, v3, s0
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc
-; GFX9-NEXT: v_perm_b32 v0, v0, v4, s4
-; GFX9-NEXT: v_perm_b32 v1, v1, v3, s4
+; GFX9-NEXT: v_perm_b32 v0, v0, v4, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <4 x half> @llvm.maximum.v4f16(<4 x half> %a, <4 x half> %b)
%max1 = call <4 x half> @llvm.maximum.v4f16(<4 x half> %max0, <4 x half> <half 4.0, half 4.0, half 4.0, half 4.0>)
@@ -2346,12 +2577,14 @@ define double @v_fmaximum3_f64(double %a, double %b, double %c) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_max_f64 v[6:7], v[0:1], v[2:3]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[4:5]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2377,12 +2610,14 @@ define double @v_fmaximum3_f64_commute(double %a, double %b, double %c) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_max_f64 v[6:7], v[0:1], v[2:3]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX9-NEXT: v_max_f64 v[2:3], v[4:5], v[0:1]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[0:1]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2404,19 +2639,20 @@ define amdgpu_ps <2 x i32> @s_fmaximum3_f64(double inreg %a, double inreg %b, do
;
; GFX9-LABEL: s_fmaximum3_f64:
; GFX9: ; %bb.0:
-; GFX9-NEXT: v_mov_b32_e32 v0, s2
-; GFX9-NEXT: v_mov_b32_e32 v1, s3
+; GFX9-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
; GFX9-NEXT: v_max_f64 v[2:3], s[0:1], v[0:1]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, s[0:1], v[0:1]
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, s[0:1], v[0:1]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], s[4:5]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, s[4:5], v[0:1]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v4, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v1, v2, 0, vcc
-; GFX9-NEXT: v_readfirstlane_b32 s0, v1
; GFX9-NEXT: v_readfirstlane_b32 s1, v0
+; GFX9-NEXT: v_readfirstlane_b32 s0, v1
; GFX9-NEXT: ; return to shader part epilog
%max0 = call double @llvm.maximum.f64(double %a, double %b)
%max1 = call double @llvm.maximum.f64(double %max0, double %c)
@@ -2447,12 +2683,14 @@ define double @v_fmaximum3_f64_fabs0(double %a, double %b, double %c) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_max_f64 v[6:7], |v[0:1]|, v[2:3]
-; GFX9-NEXT: v_cmp_u_f64_e64 vcc, |v[0:1]|, v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e64 vcc, |v[0:1]|, v[2:3]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[4:5]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2479,12 +2717,14 @@ define double @v_fmaximum3_f64_fabs1(double %a, double %b, double %c) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_max_f64 v[6:7], v[0:1], |v[2:3]|
-; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], |v[2:3]|
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], |v[2:3]|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[4:5]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2511,12 +2751,14 @@ define double @v_fmaximum3_f64_fabs2(double %a, double %b, double %c) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_max_f64 v[6:7], v[0:1], v[2:3]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], |v[4:5]|
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], |v[4:5]|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2543,12 +2785,14 @@ define double @v_fmaximum3_f64_fabs_all(double %a, double %b, double %c) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_max_f64 v[6:7], |v[0:1]|, |v[2:3]|
-; GFX9-NEXT: v_cmp_u_f64_e64 vcc, |v[0:1]|, |v[2:3]|
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e64 vcc, |v[0:1]|, |v[2:3]|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], |v[4:5]|
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], |v[4:5]|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2577,12 +2821,14 @@ define double @v_fmaximum3_f64_fneg_all(double %a, double %b, double %c) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_max_f64 v[6:7], -v[0:1], -v[2:3]
-; GFX9-NEXT: v_cmp_u_f64_e64 vcc, -v[0:1], -v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e64 vcc, -v[0:1], -v[2:3]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], -v[4:5]
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], -v[4:5]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2611,12 +2857,14 @@ define double @v_fmaximum3_f64_fneg_fabs_all(double %a, double %b, double %c) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_max_f64 v[6:7], -|v[0:1]|, -|v[2:3]|
-; GFX9-NEXT: v_cmp_u_f64_e64 vcc, -|v[0:1]|, -|v[2:3]|
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e64 vcc, -|v[0:1]|, -|v[2:3]|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], -|v[4:5]|
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], -|v[4:5]|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2648,12 +2896,14 @@ define double @v_fmaximum3_f64_fneg0(double %a, double %b, double %c) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_max_f64 v[6:7], -v[0:1], v[2:3]
-; GFX9-NEXT: v_cmp_u_f64_e64 vcc, -v[0:1], v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e64 vcc, -v[0:1], v[2:3]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[4:5]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2680,12 +2930,14 @@ define double @v_fmaximum3_f64_fneg1(double %a, double %b, double %c) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_max_f64 v[6:7], v[0:1], -v[2:3]
-; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], -v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], -v[2:3]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[4:5]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2712,12 +2964,14 @@ define double @v_fmaximum3_f64_fneg2(double %a, double %b, double %c) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_max_f64 v[6:7], v[0:1], v[2:3]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], -v[4:5]
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], -v[4:5]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2743,15 +2997,17 @@ define double @v_fmaximum3_f64_const0(double %b, double %c) {
; GFX9-LABEL: v_fmaximum3_f64_const0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_mov_b32 s4, 0
-; GFX9-NEXT: s_mov_b32 s5, 0x40200000
-; GFX9-NEXT: v_max_f64 v[4:5], v[0:1], s[4:5]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT: s_mov_b32 s0, 0
+; GFX9-NEXT: s_mov_b32 s1, 0x40200000
+; GFX9-NEXT: v_max_f64 v[4:5], v[0:1], s[0:1]
; GFX9-NEXT: v_mov_b32_e32 v6, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
; GFX9-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2777,14 +3033,15 @@ define double @v_fmaximum3_f64__const2(double %a, double %b) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v6, 0x7ff80000
-; GFX9-NEXT: s_mov_b32 s4, 0
-; GFX9-NEXT: s_mov_b32 s5, 0x40200000
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT: s_mov_b32 s0, 0
+; GFX9-NEXT: s_mov_b32 s1, 0x40200000
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], s[4:5]
+; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], s[0:1]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v6, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2810,12 +3067,14 @@ define double @v_fmaximum3_f64_inlineimm0(double %b, double %c) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_max_f64 v[4:5], v[0:1], 4.0
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
; GFX9-NEXT: v_mov_b32_e32 v6, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
; GFX9-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2841,12 +3100,14 @@ define double @v_fmaximum3_f64__inlineimm(double %a, double %b) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v6, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], 4.0
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v6, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2871,17 +3132,18 @@ define double @v_fmaximum3_f64_const1_const2(double %a) {
; GFX9-LABEL: v_fmaximum3_f64_const1_const2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_mov_b32 s4, 0
-; GFX9-NEXT: s_mov_b32 s5, 0x40200000
-; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], s[4:5]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT: s_mov_b32 s0, 0
+; GFX9-NEXT: s_mov_b32 s1, 0x40200000
+; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], s[0:1]
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7ff80000
-; GFX9-NEXT: s_mov_b32 s4, 0
-; GFX9-NEXT: s_mov_b32 s5, 0x40300000
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT: s_mov_b32 s0, 0
+; GFX9-NEXT: s_mov_b32 s1, 0x40300000
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], s[4:5]
+; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], s[0:1]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2909,9 +3171,11 @@ define <2 x float> @v_no_fmaximum3_f32__multi_use(float %a, float %b, float %c)
; GFX9-NEXT: v_max_f32_e32 v3, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_max_f32_e32 v1, v0, v2
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.maximum.f32(float %a, float %b)
@@ -2935,11 +3199,14 @@ define amdgpu_ps <2 x i32> @s_no_fmaximum3_f32__multi_use(float inreg %a, float
; GFX9-NEXT: v_max_f32_e32 v1, s0, v0
; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX9-NEXT: v_max_f32_e32 v1, s2, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s2, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_readfirstlane_b32 s1, v1
; GFX9-NEXT: ; return to shader part epilog
%max0 = call float @llvm.maximum.f32(float %a, float %b)
@@ -2973,9 +3240,11 @@ define <2 x half> @v_no_fmaximum3_f16__multi_use(half %a, half %b, half %c) {
; GFX9-NEXT: v_max_f16_e32 v3, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_max_f16_e32 v1, v0, v2
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -3002,11 +3271,13 @@ define amdgpu_ps <2 x i32> @s_no_fmaximum3_f16__multi_use(half inreg %a, half in
; GFX9-NEXT: v_max_f16_e32 v1, s0, v0
; GFX9-NEXT: v_mov_b32_e32 v2, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, s0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX9-NEXT: v_max_f16_e32 v1, s2, v0
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, s2, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
; GFX9-NEXT: v_readfirstlane_b32 s1, v1
@@ -3043,19 +3314,23 @@ define <4 x half> @v_no_fmaximum3_v2f16__multi_use(<2 x half> %a, <2 x half> %b,
; GFX9-NEXT: v_pk_max_f16 v3, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v5, v4, v3, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v0, v1, v5, s4
+; GFX9-NEXT: v_perm_b32 v0, v1, v5, s0
; GFX9-NEXT: v_pk_max_f16 v3, v0, v2
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v5, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v5, v4, v3, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
-; GFX9-NEXT: v_perm_b32 v1, v1, v5, s4
+; GFX9-NEXT: v_perm_b32 v1, v1, v5, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x half> @llvm.maximum.f16(<2 x half> %a, <2 x half> %b)
%max1 = call <2 x half> @llvm.maximum.f16(<2 x half> %max0, <2 x half> %c)
@@ -3080,12 +3355,14 @@ define <2 x double> @v_no_fmaximum3_f64__multi_use(double %a, double %b, double
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_max_f64 v[6:7], v[0:1], v[2:3]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[4:5]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -3095,3 +3372,6 @@ define <2 x double> @v_no_fmaximum3_f64__multi_use(double %a, double %b, double
%insert.1 = insertelement <2 x double> %insert.0, double %max1, i32 1
ret <2 x double> %insert.1
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX940: {{.*}}
+; GFX950: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/fminimum3.ll b/llvm/test/CodeGen/AMDGPU/fminimum3.ll
index d9ba2de48bb010..43293512c8c21d 100644
--- a/llvm/test/CodeGen/AMDGPU/fminimum3.ll
+++ b/llvm/test/CodeGen/AMDGPU/fminimum3.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX940 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950 %s
define float @v_fminimum3_f32(float %a, float %b, float %c) {
; GFX12-LABEL: v_fminimum3_f32:
@@ -19,9 +20,11 @@ define float @v_fminimum3_f32(float %a, float %b, float %c) {
; GFX9-NEXT: v_min_f32_e32 v3, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_min_f32_e32 v1, v0, v2
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.minimum.f32(float %a, float %b)
@@ -46,9 +49,11 @@ define float @v_fminimum3_f32_commute(float %a, float %b, float %c) {
; GFX9-NEXT: v_min_f32_e32 v3, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_min_f32_e32 v1, v2, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.minimum.f32(float %a, float %b)
@@ -71,10 +76,13 @@ define amdgpu_ps i32 @s_fminimum3_f32(float inreg %a, float inreg %b, float inre
; GFX9-NEXT: v_min_f32_e32 v1, s0, v0
; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX9-NEXT: v_min_f32_e32 v1, s2, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s2, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
; GFX9-NEXT: ; return to shader part epilog
%max0 = call float @llvm.minimum.f32(float %a, float %b)
@@ -101,9 +109,11 @@ define float @v_fminimum3_f32_fabs0(float %a, float %b, float %c) {
; GFX9-NEXT: v_min_f32_e64 v3, |v0|, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v0|, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_min_f32_e32 v1, v0, v2
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call float @llvm.fabs.f32(float %a)
@@ -129,9 +139,11 @@ define float @v_fminimum3_f32_fabs1(float %a, float %b, float %c) {
; GFX9-NEXT: v_min_f32_e64 v3, v0, |v1|
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v1|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_min_f32_e32 v1, v0, v2
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%b.fabs = call float @llvm.fabs.f32(float %b)
@@ -157,9 +169,11 @@ define float @v_fminimum3_f32_fabs2(float %a, float %b, float %c) {
; GFX9-NEXT: v_min_f32_e32 v3, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_min_f32_e64 v1, v0, |v2|
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v2|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%c.fabs = call float @llvm.fabs.f32(float %c)
@@ -185,9 +199,11 @@ define float @v_fminimum3_f32_fabs_all(float %a, float %b, float %c) {
; GFX9-NEXT: v_min_f32_e64 v3, |v0|, |v1|
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v1|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_min_f32_e64 v1, v0, |v2|
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v2|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call float @llvm.fabs.f32(float %a)
@@ -215,9 +231,11 @@ define float @v_fminimum3_f32_fneg_all(float %a, float %b, float %c) {
; GFX9-NEXT: v_min_f32_e64 v3, -v0, -v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_min_f32_e64 v1, v0, -v2
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg float %a
@@ -245,9 +263,11 @@ define float @v_fminimum3_f32_fneg_fabs_all(float %a, float %b, float %c) {
; GFX9-NEXT: v_min_f32_e64 v3, -|v0|, -|v1|
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -|v0|, -|v1|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_min_f32_e64 v1, v0, -|v2|
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -|v2|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call float @llvm.fabs.f32(float %a)
@@ -278,9 +298,11 @@ define float @v_fminimum3_f32_fneg0(float %a, float %b, float %c) {
; GFX9-NEXT: v_min_f32_e64 v3, -v0, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_min_f32_e32 v1, v0, v2
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg float %a
@@ -306,9 +328,11 @@ define float @v_fminimum3_f32_fneg1(float %a, float %b, float %c) {
; GFX9-NEXT: v_min_f32_e64 v3, v0, -v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_min_f32_e32 v1, v0, v2
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%b.fneg = fneg float %b
@@ -334,9 +358,11 @@ define float @v_fminimum3_f32_fneg2(float %a, float %b, float %c) {
; GFX9-NEXT: v_min_f32_e32 v3, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_min_f32_e64 v1, v0, -v2
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%c.fneg = fneg float %c
@@ -362,9 +388,11 @@ define float @v_fminimum3_f32_const0(float %b, float %c) {
; GFX9-NEXT: v_min_f32_e32 v2, 0x41000000, v0
; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: v_min_f32_e32 v2, v0, v1
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.minimum.f32(float 8.0, float %b)
@@ -389,9 +417,11 @@ define float @v_fminimum3_f32__const2(float %a, float %b) {
; GFX9-NEXT: v_min_f32_e32 v2, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: v_min_f32_e32 v1, 0x41000000, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.minimum.f32(float %a, float %b)
@@ -416,9 +446,11 @@ define float @v_fminimum3_f32_inlineimm0(float %b, float %c) {
; GFX9-NEXT: v_min_f32_e32 v2, 4.0, v0
; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: v_min_f32_e32 v2, v0, v1
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.minimum.f32(float 4.0, float %b)
@@ -443,9 +475,11 @@ define float @v_fminimum3_f32__inlineimm(float %a, float %b) {
; GFX9-NEXT: v_min_f32_e32 v2, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: v_min_f32_e32 v1, 4.0, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.minimum.f32(float %a, float %b)
@@ -472,9 +506,11 @@ define float @v_fminimum3_f32_const1_const2(float %a) {
; GFX9-NEXT: v_min_f32_e32 v1, 0x41000000, v0
; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX9-NEXT: v_min_f32_e32 v1, 0x41800000, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.minimum.f32(float %a, float 8.0)
@@ -500,15 +536,19 @@ define <2 x float> @v_fminimum3_v2f32(<2 x float> %a, <2 x float> %b, <2 x float
; GFX9-NEXT: v_min_f32_e32 v6, v1, v3
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
; GFX9-NEXT: v_min_f32_e32 v3, v0, v2
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
; GFX9-NEXT: v_min_f32_e32 v2, v4, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v4, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
; GFX9-NEXT: v_min_f32_e32 v2, v5, v1
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v5, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %a, <2 x float> %b)
@@ -534,15 +574,19 @@ define <2 x float> @v_fminimum3_v2f32_commute(<2 x float> %a, <2 x float> %b, <2
; GFX9-NEXT: v_min_f32_e32 v6, v1, v3
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
; GFX9-NEXT: v_min_f32_e32 v3, v0, v2
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
; GFX9-NEXT: v_min_f32_e32 v2, v0, v4
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
; GFX9-NEXT: v_min_f32_e32 v2, v1, v5
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %a, <2 x float> %b)
@@ -568,15 +612,19 @@ define <2 x float> @v_fminimum3_v2f32__fabs_all(<2 x float> %a, <2 x float> %b,
; GFX9-NEXT: v_min_f32_e64 v6, |v1|, |v3|
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v1|, |v3|
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
; GFX9-NEXT: v_min_f32_e64 v3, |v0|, |v2|
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v2|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
; GFX9-NEXT: v_min_f32_e64 v2, v0, |v4|
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v4|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
; GFX9-NEXT: v_min_f32_e64 v2, v1, |v5|
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v1, |v5|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a)
@@ -605,15 +653,19 @@ define <2 x float> @v_fminimum3_v2f32__fneg_all(<2 x float> %a, <2 x float> %b,
; GFX9-NEXT: v_min_f32_e64 v6, -v1, -v3
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v1, -v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
; GFX9-NEXT: v_min_f32_e64 v3, -v0, -v2
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
; GFX9-NEXT: v_min_f32_e64 v2, v0, -v4
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v4
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
; GFX9-NEXT: v_min_f32_e64 v2, v1, -v5
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v1, -v5
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg <2 x float> %a
@@ -642,15 +694,19 @@ define <2 x float> @v_fminimum3_v2f32__inlineimm1(<2 x float> %a, <2 x float> %c
; GFX9-NEXT: v_min_f32_e32 v4, 2.0, v1
; GFX9-NEXT: v_mov_b32_e32 v5, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
; GFX9-NEXT: v_min_f32_e32 v4, 2.0, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
; GFX9-NEXT: v_min_f32_e32 v4, v0, v2
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
; GFX9-NEXT: v_min_f32_e32 v2, v1, v3
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %a, <2 x float> <float 2.0, float 2.0>)
@@ -676,15 +732,19 @@ define <2 x float> @v_fminimum3_v2f32__inlineimm2(<2 x float> %a, <2 x float> %b
; GFX9-NEXT: v_min_f32_e32 v4, v1, v3
; GFX9-NEXT: v_mov_b32_e32 v5, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
; GFX9-NEXT: v_min_f32_e32 v3, v0, v2
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
; GFX9-NEXT: v_min_f32_e32 v2, 4.0, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc
; GFX9-NEXT: v_min_f32_e32 v2, 4.0, v1
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %a, <2 x float> %b)
@@ -711,21 +771,27 @@ define <3 x float> @v_fminimum3_v3f32(<3 x float> %a, <3 x float> %b, <3 x float
; GFX9-NEXT: v_min_f32_e32 v9, v2, v5
; GFX9-NEXT: v_mov_b32_e32 v10, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
; GFX9-NEXT: v_min_f32_e32 v5, v1, v4
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
; GFX9-NEXT: v_min_f32_e32 v4, v0, v3
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc
; GFX9-NEXT: v_min_f32_e32 v3, v6, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v6, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc
; GFX9-NEXT: v_min_f32_e32 v3, v7, v1
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v7, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc
; GFX9-NEXT: v_min_f32_e32 v3, v8, v2
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v8, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %a, <3 x float> %b)
@@ -752,21 +818,27 @@ define <3 x float> @v_fminimum3_v3f32_commute(<3 x float> %a, <3 x float> %b, <3
; GFX9-NEXT: v_min_f32_e32 v9, v2, v5
; GFX9-NEXT: v_mov_b32_e32 v10, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
; GFX9-NEXT: v_min_f32_e32 v5, v1, v4
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
; GFX9-NEXT: v_min_f32_e32 v4, v0, v3
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc
; GFX9-NEXT: v_min_f32_e32 v3, v0, v6
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v6
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc
; GFX9-NEXT: v_min_f32_e32 v3, v1, v7
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v7
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc
; GFX9-NEXT: v_min_f32_e32 v3, v2, v8
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v8
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %a, <3 x float> %b)
@@ -793,21 +865,27 @@ define <3 x float> @v_fminimum3_v3f32__fabs_all(<3 x float> %a, <3 x float> %b,
; GFX9-NEXT: v_min_f32_e64 v9, |v2|, |v5|
; GFX9-NEXT: v_mov_b32_e32 v10, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v2|, |v5|
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
; GFX9-NEXT: v_min_f32_e64 v5, |v1|, |v4|
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v1|, |v4|
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
; GFX9-NEXT: v_min_f32_e64 v4, |v0|, |v3|
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v3|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc
; GFX9-NEXT: v_min_f32_e64 v3, v0, |v6|
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v6|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc
; GFX9-NEXT: v_min_f32_e64 v3, v1, |v7|
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v1, |v7|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc
; GFX9-NEXT: v_min_f32_e64 v3, v2, |v8|
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v2, |v8|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call <3 x float> @llvm.fabs.v3f32(<3 x float> %a)
@@ -837,21 +915,27 @@ define <3 x float> @v_fminimum3_v3f32__fneg_all(<3 x float> %a, <3 x float> %b,
; GFX9-NEXT: v_min_f32_e64 v9, -v2, -v5
; GFX9-NEXT: v_mov_b32_e32 v10, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v2, -v5
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
; GFX9-NEXT: v_min_f32_e64 v5, -v1, -v4
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v1, -v4
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
; GFX9-NEXT: v_min_f32_e64 v4, -v0, -v3
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v3
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc
; GFX9-NEXT: v_min_f32_e64 v3, v0, -v6
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v6
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc
; GFX9-NEXT: v_min_f32_e64 v3, v1, -v7
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v1, -v7
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc
; GFX9-NEXT: v_min_f32_e64 v3, v2, -v8
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v2, -v8
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg <3 x float> %a
@@ -881,21 +965,27 @@ define <3 x float> @v_fminimum3_v3f32__inlineimm1(<3 x float> %a, <3 x float> %c
; GFX9-NEXT: v_min_f32_e32 v6, 2.0, v2
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc
; GFX9-NEXT: v_min_f32_e32 v6, 2.0, v1
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
; GFX9-NEXT: v_min_f32_e32 v6, 2.0, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX9-NEXT: v_min_f32_e32 v6, v0, v3
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX9-NEXT: v_min_f32_e32 v3, v1, v4
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
; GFX9-NEXT: v_min_f32_e32 v3, v2, v5
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %a, <3 x float> <float 2.0, float 2.0, float 2.0>)
@@ -922,21 +1012,27 @@ define <3 x float> @v_fminimum3_v3f32__inlineimm2(<3 x float> %a, <3 x float> %b
; GFX9-NEXT: v_min_f32_e32 v6, v2, v5
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc
; GFX9-NEXT: v_min_f32_e32 v5, v1, v4
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v5, vcc
; GFX9-NEXT: v_min_f32_e32 v4, v0, v3
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v5, vcc
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v4, vcc
; GFX9-NEXT: v_min_f32_e32 v3, 4.0, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
; GFX9-NEXT: v_min_f32_e32 v3, 4.0, v1
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
; GFX9-NEXT: v_min_f32_e32 v3, 4.0, v2
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %a, <3 x float> %b)
@@ -962,9 +1058,11 @@ define half @v_fminimum3_f16(half %a, half %b, half %c) {
; GFX9-NEXT: v_min_f16_e32 v3, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_min_f16_e32 v1, v0, v2
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call half @llvm.minimum.f16(half %a, half %b)
@@ -989,9 +1087,11 @@ define half @v_fminimum3_f16_commute(half %a, half %b, half %c) {
; GFX9-NEXT: v_min_f16_e32 v3, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_min_f16_e32 v1, v2, v0
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v2, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call half @llvm.minimum.f16(half %a, half %b)
@@ -1016,11 +1116,14 @@ define amdgpu_ps i32 @s_fminimum3_f16(half inreg %a, half inreg %b, half inreg %
; GFX9-NEXT: v_min_f16_e32 v1, s0, v0
; GFX9-NEXT: v_mov_b32_e32 v2, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, s0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX9-NEXT: v_min_f16_e32 v1, s2, v0
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, s2, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
; GFX9-NEXT: ; return to shader part epilog
%max0 = call half @llvm.minimum.f16(half %a, half %b)
@@ -1048,9 +1151,11 @@ define half @v_fminimum3_f16_fabs0(half %a, half %b, half %c) {
; GFX9-NEXT: v_min_f16_e64 v3, |v0|, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, |v0|, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_min_f16_e32 v1, v0, v2
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call half @llvm.fabs.f16(half %a)
@@ -1076,9 +1181,11 @@ define half @v_fminimum3_f16_fabs1(half %a, half %b, half %c) {
; GFX9-NEXT: v_min_f16_e64 v3, v0, |v1|
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v0, |v1|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_min_f16_e32 v1, v0, v2
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%b.fabs = call half @llvm.fabs.f16(half %b)
@@ -1104,9 +1211,11 @@ define half @v_fminimum3_f16_fabs2(half %a, half %b, half %c) {
; GFX9-NEXT: v_min_f16_e32 v3, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_min_f16_e64 v1, v0, |v2|
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v0, |v2|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%c.fabs = call half @llvm.fabs.f16(half %c)
@@ -1132,9 +1241,11 @@ define half @v_fminimum3_f16_fabs_all(half %a, half %b, half %c) {
; GFX9-NEXT: v_min_f16_e64 v3, |v0|, |v1|
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, |v0|, |v1|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_min_f16_e64 v1, v0, |v2|
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v0, |v2|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call half @llvm.fabs.f16(half %a)
@@ -1162,9 +1273,11 @@ define half @v_fminimum3_f16_fneg_all(half %a, half %b, half %c) {
; GFX9-NEXT: v_min_f16_e64 v3, -v0, -v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, -v0, -v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_min_f16_e64 v1, v0, -v2
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v0, -v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg half %a
@@ -1192,9 +1305,11 @@ define half @v_fminimum3_f16_fneg_fabs_all(half %a, half %b, half %c) {
; GFX9-NEXT: v_min_f16_e64 v3, -|v0|, -|v1|
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, -|v0|, -|v1|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_min_f16_e64 v1, v0, -|v2|
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v0, -|v2|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call half @llvm.fabs.f16(half %a)
@@ -1225,9 +1340,11 @@ define half @v_fminimum3_f16_fneg0(half %a, half %b, half %c) {
; GFX9-NEXT: v_min_f16_e64 v3, -v0, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, -v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_min_f16_e32 v1, v0, v2
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg half %a
@@ -1253,9 +1370,11 @@ define half @v_fminimum3_f16_fneg1(half %a, half %b, half %c) {
; GFX9-NEXT: v_min_f16_e64 v3, v0, -v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v0, -v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_min_f16_e32 v1, v0, v2
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%b.fneg = fneg half %b
@@ -1281,9 +1400,11 @@ define half @v_fminimum3_f16_fneg2(half %a, half %b, half %c) {
; GFX9-NEXT: v_min_f16_e32 v3, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_min_f16_e64 v1, v0, -v2
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v0, -v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%c.fneg = fneg half %c
@@ -1309,9 +1430,11 @@ define half @v_fminimum3_f16_const0(half %b, half %c) {
; GFX9-NEXT: v_min_f16_e32 v2, 0x4800, v0
; GFX9-NEXT: v_mov_b32_e32 v3, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: v_min_f16_e32 v2, v0, v1
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call half @llvm.minimum.f16(half 8.0, half %b)
@@ -1336,9 +1459,11 @@ define half @v_fminimum3_f16__const2(half %a, half %b) {
; GFX9-NEXT: v_min_f16_e32 v2, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v3, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: v_min_f16_e32 v1, 0x4800, v0
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call half @llvm.minimum.f16(half %a, half %b)
@@ -1363,9 +1488,11 @@ define half @v_fminimum3_f16_inlineimm0(half %b, half %c) {
; GFX9-NEXT: v_min_f16_e32 v2, 4.0, v0
; GFX9-NEXT: v_mov_b32_e32 v3, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: v_min_f16_e32 v2, v0, v1
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call half @llvm.minimum.f16(half 4.0, half %b)
@@ -1390,9 +1517,11 @@ define half @v_fminimum3_f16__inlineimm(half %a, half %b) {
; GFX9-NEXT: v_min_f16_e32 v2, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v3, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: v_min_f16_e32 v1, 4.0, v0
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call half @llvm.minimum.f16(half %a, half %b)
@@ -1419,9 +1548,11 @@ define half @v_fminimum3_f16_const1_const2(half %a) {
; GFX9-NEXT: v_min_f16_e32 v1, 0x4800, v0
; GFX9-NEXT: v_mov_b32_e32 v2, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX9-NEXT: v_min_f16_e32 v1, 0x4c00, v0
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call half @llvm.minimum.f16(half %a, half 8.0)
@@ -1448,19 +1579,23 @@ define <2 x half> @v_fminimum3_v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c
; GFX9-NEXT: v_pk_min_f16 v3, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v5, v4, v3, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v1, v0, v5, s4
+; GFX9-NEXT: v_perm_b32 v1, v0, v5, s0
; GFX9-NEXT: v_pk_min_f16 v1, v2, v1
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v2, v5
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v2, v0 src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX9-NEXT: v_perm_b32 v0, v0, v3, s4
+; GFX9-NEXT: v_perm_b32 v0, v0, v3, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x half> @llvm.minimum.v2f16(<2 x half> %a, <2 x half> %b)
%max1 = call <2 x half> @llvm.minimum.v2f16(<2 x half> %c, <2 x half> %max0)
@@ -1486,19 +1621,23 @@ define <2 x half> @v_fminimum3_v2f16_commute(<2 x half> %a, <2 x half> %b, <2 x
; GFX9-NEXT: v_pk_min_f16 v3, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v5, v4, v3, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v1, v0, v5, s4
+; GFX9-NEXT: v_perm_b32 v1, v0, v5, s0
; GFX9-NEXT: v_pk_min_f16 v1, v1, v2
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v5, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX9-NEXT: v_perm_b32 v0, v0, v3, s4
+; GFX9-NEXT: v_perm_b32 v0, v0, v3, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x half> @llvm.minimum.v2f16(<2 x half> %a, <2 x half> %b)
%max1 = call <2 x half> @llvm.minimum.v2f16(<2 x half> %max0, <2 x half> %c)
@@ -1527,22 +1666,25 @@ define <2 x half> @v_fminimum3_v2f16__fabs_all(<2 x half> %a, <2 x half> %b, <2
; GFX9-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v0
; GFX9-NEXT: v_and_b32_e32 v4, 0x7fff7fff, v1
; GFX9-NEXT: v_pk_min_f16 v3, v3, v4
-; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v3
; GFX9-NEXT: v_mov_b32_e32 v6, 0x7e00
+; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v3
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, |v0|, |v1| src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
+; GFX9-NEXT: v_and_b32_e32 v5, 0x7fff7fff, v2
; GFX9-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, |v0|, |v1|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v6, v3, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_and_b32_e32 v5, 0x7fff7fff, v2
-; GFX9-NEXT: v_perm_b32 v1, v4, v0, s4
+; GFX9-NEXT: v_perm_b32 v1, v4, v0, s0
; GFX9-NEXT: v_pk_min_f16 v1, v1, v5
-; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v1
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v4, |v2| src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v1
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v0, |v2|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v6, v1, vcc
-; GFX9-NEXT: v_perm_b32 v0, v3, v0, s4
+; GFX9-NEXT: v_perm_b32 v0, v3, v0, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %a)
%b.fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %b)
@@ -1571,19 +1713,23 @@ define <2 x half> @v_fminimum3_v2f16__fneg_all(<2 x half> %a, <2 x half> %b, <2
; GFX9-NEXT: v_pk_min_f16 v3, v0, v1 neg_lo:[1,1] neg_hi:[1,1]
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, -v0, -v1
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v5, v4, v3, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, -v0, -v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v1, v0, v5, s4
+; GFX9-NEXT: v_perm_b32 v1, v0, v5, s0
; GFX9-NEXT: v_pk_min_f16 v1, v1, v2 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v5, -v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, -v2 src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX9-NEXT: v_perm_b32 v0, v0, v3, s4
+; GFX9-NEXT: v_perm_b32 v0, v0, v3, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg <2 x half> %a
%b.fneg = fneg <2 x half> %b
@@ -1610,21 +1756,25 @@ define <2 x half> @v_fminimum3_v2f16__inlineimm1(<2 x half> %a, <2 x half> %c) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_pk_min_f16 v2, v0, 2.0 op_sel_hi:[1,0]
-; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v2
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
+; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v2, v3, v0, s4
+; GFX9-NEXT: v_perm_b32 v2, v3, v0, s0
; GFX9-NEXT: v_pk_min_f16 v2, v2, v1
-; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v3, v1 src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v2
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
-; GFX9-NEXT: v_perm_b32 v0, v3, v0, s4
+; GFX9-NEXT: v_perm_b32 v0, v3, v0, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x half> @llvm.minimum.v2f16(<2 x half> %a, <2 x half> <half 2.0, half 2.0>)
%max1 = call <2 x half> @llvm.minimum.v2f16(<2 x half> %max0, <2 x half> %c)
@@ -1650,19 +1800,23 @@ define <2 x half> @v_fminimum3_v2f16__inlineimm2(<2 x half> %a, <2 x half> %b) {
; GFX9-NEXT: v_pk_min_f16 v2, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v3, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v4, v3, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v1, v0, v4, s4
+; GFX9-NEXT: v_perm_b32 v1, v0, v4, s0
; GFX9-NEXT: v_pk_min_f16 v1, v1, 4.0 op_sel_hi:[1,0]
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v4, v4
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v1, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc
-; GFX9-NEXT: v_perm_b32 v0, v0, v2, s4
+; GFX9-NEXT: v_perm_b32 v0, v0, v2, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x half> @llvm.minimum.v2f16(<2 x half> %a, <2 x half> %b)
%max1 = call <2 x half> @llvm.minimum.v2f16(<2 x half> %max0, <2 x half> <half 4.0, half 4.0>)
@@ -1690,29 +1844,35 @@ define <3 x half> @v_fminimum3_v3f16(<3 x half> %a, <3 x half> %b, <3 x half> %c
; GFX9-NEXT: v_pk_min_f16 v6, v0, v2
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v8, v7, v6, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v6
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX9-NEXT: v_pk_min_f16 v2, v1, v3
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v6, v7, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v1, v1, v6, s4
+; GFX9-NEXT: v_perm_b32 v1, v1, v6, s0
; GFX9-NEXT: v_pk_min_f16 v1, v5, v1
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v5, v6
-; GFX9-NEXT: v_perm_b32 v2, v0, v8, s4
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
+; GFX9-NEXT: v_perm_b32 v2, v0, v8, s0
; GFX9-NEXT: v_pk_min_f16 v2, v4, v2
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v4, v8
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v4, v0 src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX9-NEXT: v_perm_b32 v0, v0, v3, s4
+; GFX9-NEXT: v_perm_b32 v0, v0, v3, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x half> @llvm.minimum.v3f16(<3 x half> %a, <3 x half> %b)
%max1 = call <3 x half> @llvm.minimum.v3f16(<3 x half> %c, <3 x half> %max0)
@@ -1740,29 +1900,35 @@ define <3 x half> @v_fminimum3_v3f16_commute(<3 x half> %a, <3 x half> %b, <3 x
; GFX9-NEXT: v_pk_min_f16 v6, v0, v2
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v8, v7, v6, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v6
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX9-NEXT: v_pk_min_f16 v2, v1, v3
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v6, v7, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v1, v1, v6, s4
+; GFX9-NEXT: v_perm_b32 v1, v1, v6, s0
; GFX9-NEXT: v_pk_min_f16 v1, v1, v5
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v6, v5
-; GFX9-NEXT: v_perm_b32 v2, v0, v8, s4
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
+; GFX9-NEXT: v_perm_b32 v2, v0, v8, s0
; GFX9-NEXT: v_pk_min_f16 v2, v2, v4
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v8, v4
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX9-NEXT: v_perm_b32 v0, v0, v3, s4
+; GFX9-NEXT: v_perm_b32 v0, v0, v3, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x half> @llvm.minimum.v3f16(<3 x half> %a, <3 x half> %b)
%max1 = call <3 x half> @llvm.minimum.v3f16(<3 x half> %max0, <3 x half> %c)
@@ -1799,33 +1965,37 @@ define <3 x half> @v_fminimum3_v3f16__fabs_all(<3 x half> %a, <3 x half> %b, <3
; GFX9-NEXT: v_and_b32_e32 v6, 0x7fff7fff, v0
; GFX9-NEXT: v_and_b32_e32 v8, 0x7fff7fff, v2
; GFX9-NEXT: v_pk_min_f16 v7, v7, v9
-; GFX9-NEXT: v_lshrrev_b32_e32 v9, 16, v7
; GFX9-NEXT: v_mov_b32_e32 v12, 0x7e00
+; GFX9-NEXT: v_lshrrev_b32_e32 v9, 16, v7
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, |v1|, |v3| src0_sel:WORD_1 src1_sel:WORD_1
; GFX9-NEXT: v_pk_min_f16 v6, v6, v8
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
; GFX9-NEXT: v_cndmask_b32_e32 v9, v12, v9, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v8, 16, v6
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, |v0|, |v2| src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: v_and_b32_e32 v11, 0x7fff7fff, v4
+; GFX9-NEXT: v_and_b32_e32 v10, 0x7fff7fff, v5
; GFX9-NEXT: v_cndmask_b32_e32 v8, v12, v8, vcc
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, |v1|, |v3|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v12, v7, vcc
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, |v0|, |v2|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v12, v6, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_and_b32_e32 v11, 0x7fff7fff, v4
-; GFX9-NEXT: v_perm_b32 v2, v8, v0, s4
+; GFX9-NEXT: v_perm_b32 v2, v8, v0, s0
; GFX9-NEXT: v_pk_min_f16 v2, v2, v11
-; GFX9-NEXT: v_and_b32_e32 v10, 0x7fff7fff, v5
-; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v8, |v4| src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT: v_perm_b32 v6, v9, v1, s4
+; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v2
+; GFX9-NEXT: v_perm_b32 v6, v9, v1, s0
; GFX9-NEXT: v_cndmask_b32_e32 v3, v12, v3, vcc
; GFX9-NEXT: v_pk_min_f16 v6, v6, v10
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v1, |v5|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v12, v6, vcc
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v0, |v4|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v12, v2, vcc
-; GFX9-NEXT: v_perm_b32 v0, v3, v0, s4
+; GFX9-NEXT: v_perm_b32 v0, v3, v0, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call <3 x half> @llvm.fabs.v3f16(<3 x half> %a)
%b.fabs = call <3 x half> @llvm.fabs.v3f16(<3 x half> %b)
@@ -1856,29 +2026,35 @@ define <3 x half> @v_fminimum3_v3f16__fneg_all(<3 x half> %a, <3 x half> %b, <3
; GFX9-NEXT: v_pk_min_f16 v6, v0, v2 neg_lo:[1,1] neg_hi:[1,1]
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, -v0, -v2
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v8, v7, v6, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v6
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, -v0, -v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX9-NEXT: v_pk_min_f16 v2, v1, v3 neg_lo:[1,1] neg_hi:[1,1]
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, -v1, -v3
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v6, v7, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, -v1, -v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v1, v1, v6, s4
+; GFX9-NEXT: v_perm_b32 v1, v1, v6, s0
; GFX9-NEXT: v_pk_min_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v6, -v5
-; GFX9-NEXT: v_perm_b32 v2, v0, v8, s4
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
+; GFX9-NEXT: v_perm_b32 v2, v0, v8, s0
; GFX9-NEXT: v_pk_min_f16 v2, v2, v4 neg_lo:[0,1] neg_hi:[0,1]
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v8, -v4
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, -v4 src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX9-NEXT: v_perm_b32 v0, v0, v3, s4
+; GFX9-NEXT: v_perm_b32 v0, v0, v3, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg <3 x half> %a
%b.fneg = fneg <3 x half> %b
@@ -1907,29 +2083,34 @@ define <3 x half> @v_fminimum3_v3f16__inlineimm1(<3 x half> %a, <3 x half> %c) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_pk_min_f16 v4, v0, 2.0 op_sel_hi:[1,0]
-; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v4
; GFX9-NEXT: v_mov_b32_e32 v6, 0x7e00
+; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v4
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc
; GFX9-NEXT: v_pk_min_f16 v7, v1, 2.0
+; GFX9-NEXT: s_mov_b32 s1, 0x5040100
+; GFX9-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v1
+; GFX9-NEXT: s_movk_i32 s0, 0x7e00
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v7, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v6, v4, vcc
-; GFX9-NEXT: s_mov_b32 s5, 0x5040100
-; GFX9-NEXT: v_perm_b32 v4, v5, v0, s5
+; GFX9-NEXT: v_perm_b32 v4, v5, v0, s1
; GFX9-NEXT: v_pk_min_f16 v4, v4, v2
-; GFX9-NEXT: s_movk_i32 s4, 0x7e00
-; GFX9-NEXT: v_lshrrev_b32_e32 v7, 16, v4
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v5, v2 src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: v_lshrrev_b32_e32 v7, 16, v4
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v5, v6, v7, vcc
-; GFX9-NEXT: v_pack_b32_f16 v7, v1, s4
+; GFX9-NEXT: v_pack_b32_f16 v7, v1, s0
; GFX9-NEXT: v_pk_min_f16 v7, v7, v3
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v7, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v6, v4, vcc
-; GFX9-NEXT: v_perm_b32 v0, v5, v0, s5
+; GFX9-NEXT: v_perm_b32 v0, v5, v0, s1
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x half> @llvm.minimum.v3f16(<3 x half> %a, <3 x half> <half 2.0, half 2.0, half 2.0>)
%max1 = call <3 x half> @llvm.minimum.v3f16(<3 x half> %max0, <3 x half> %c)
@@ -1957,29 +2138,35 @@ define <3 x half> @v_fminimum3_v3f16__inlineimm2(<3 x half> %a, <3 x half> %b) {
; GFX9-NEXT: v_pk_min_f16 v4, v0, v2
; GFX9-NEXT: v_mov_b32_e32 v5, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v4
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
; GFX9-NEXT: v_pk_min_f16 v2, v1, v3
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v1, v1, v4, s4
+; GFX9-NEXT: v_perm_b32 v1, v1, v4, s0
; GFX9-NEXT: v_pk_min_f16 v1, v1, 4.0
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v4, v4
-; GFX9-NEXT: v_perm_b32 v2, v0, v6, s4
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX9-NEXT: v_perm_b32 v2, v0, v6, s0
; GFX9-NEXT: v_pk_min_f16 v2, v2, 4.0 op_sel_hi:[1,0]
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v6, v6
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v3, v5, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc
-; GFX9-NEXT: v_perm_b32 v0, v0, v3, s4
+; GFX9-NEXT: v_perm_b32 v0, v0, v3, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x half> @llvm.minimum.v3f16(<3 x half> %a, <3 x half> %b)
%max1 = call <3 x half> @llvm.minimum.v3f16(<3 x half> %max0, <3 x half> <half 4.0, half 4.0, half 4.0>)
@@ -2007,33 +2194,41 @@ define <4 x half> @v_fminimum3_v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c
; GFX9-NEXT: v_pk_min_f16 v6, v0, v2
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v8, v7, v6, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v6
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX9-NEXT: v_pk_min_f16 v2, v1, v3
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v6, v7, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v2, v1, v6, s4
+; GFX9-NEXT: v_perm_b32 v2, v1, v6, s0
; GFX9-NEXT: v_pk_min_f16 v2, v5, v2
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v5, v6
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v5, v1 src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX9-NEXT: v_perm_b32 v2, v0, v8, s4
+; GFX9-NEXT: v_perm_b32 v2, v0, v8, s0
; GFX9-NEXT: v_pk_min_f16 v2, v4, v2
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v4, v8
+; GFX9-NEXT: v_perm_b32 v1, v1, v3, s0
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v5, v7, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v4, v0 src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX9-NEXT: v_perm_b32 v0, v0, v5, s4
-; GFX9-NEXT: v_perm_b32 v1, v1, v3, s4
+; GFX9-NEXT: v_perm_b32 v0, v0, v5, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <4 x half> @llvm.minimum.v4f16(<4 x half> %a, <4 x half> %b)
%max1 = call <4 x half> @llvm.minimum.v4f16(<4 x half> %c, <4 x half> %max0)
@@ -2061,33 +2256,41 @@ define <4 x half> @v_fminimum3_v4f16_commute(<4 x half> %a, <4 x half> %b, <4 x
; GFX9-NEXT: v_pk_min_f16 v6, v0, v2
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v8, v7, v6, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v6
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX9-NEXT: v_pk_min_f16 v2, v1, v3
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v6, v7, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v2, v1, v6, s4
+; GFX9-NEXT: v_perm_b32 v2, v1, v6, s0
; GFX9-NEXT: v_pk_min_f16 v2, v2, v5
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v6, v5
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, v5 src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX9-NEXT: v_perm_b32 v2, v0, v8, s4
+; GFX9-NEXT: v_perm_b32 v2, v0, v8, s0
; GFX9-NEXT: v_pk_min_f16 v2, v2, v4
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v8, v4
+; GFX9-NEXT: v_perm_b32 v1, v1, v3, s0
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v5, v7, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX9-NEXT: v_perm_b32 v0, v0, v5, s4
-; GFX9-NEXT: v_perm_b32 v1, v1, v3, s4
+; GFX9-NEXT: v_perm_b32 v0, v0, v5, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <4 x half> @llvm.minimum.v4f16(<4 x half> %a, <4 x half> %b)
%max1 = call <4 x half> @llvm.minimum.v4f16(<4 x half> %max0, <4 x half> %c)
@@ -2124,37 +2327,43 @@ define <4 x half> @v_fminimum3_v4f16__fabs_all(<4 x half> %a, <4 x half> %b, <4
; GFX9-NEXT: v_and_b32_e32 v6, 0x7fff7fff, v1
; GFX9-NEXT: v_and_b32_e32 v8, 0x7fff7fff, v3
; GFX9-NEXT: v_pk_min_f16 v7, v7, v9
-; GFX9-NEXT: v_lshrrev_b32_e32 v9, 16, v7
; GFX9-NEXT: v_mov_b32_e32 v12, 0x7e00
+; GFX9-NEXT: v_lshrrev_b32_e32 v9, 16, v7
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, |v0|, |v2| src0_sel:WORD_1 src1_sel:WORD_1
; GFX9-NEXT: v_pk_min_f16 v6, v6, v8
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
; GFX9-NEXT: v_cndmask_b32_e32 v9, v12, v9, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v8, 16, v6
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, |v1|, |v3| src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: v_and_b32_e32 v11, 0x7fff7fff, v5
+; GFX9-NEXT: v_and_b32_e32 v10, 0x7fff7fff, v4
; GFX9-NEXT: v_cndmask_b32_e32 v8, v12, v8, vcc
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, |v0|, |v2|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v12, v7, vcc
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, |v1|, |v3|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v12, v6, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_and_b32_e32 v11, 0x7fff7fff, v5
-; GFX9-NEXT: v_perm_b32 v2, v8, v1, s4
-; GFX9-NEXT: v_and_b32_e32 v10, 0x7fff7fff, v4
+; GFX9-NEXT: v_perm_b32 v2, v8, v1, s0
; GFX9-NEXT: v_pk_min_f16 v2, v2, v11
-; GFX9-NEXT: v_perm_b32 v6, v9, v0, s4
+; GFX9-NEXT: v_perm_b32 v6, v9, v0, s0
; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v8, |v5| src0_sel:DWORD src1_sel:WORD_1
; GFX9-NEXT: v_pk_min_f16 v6, v6, v10
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v3, v12, v3, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v7, 16, v6
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v9, |v4| src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v7, v12, v7, vcc
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v1, |v5|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v12, v2, vcc
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v0, |v4|
+; GFX9-NEXT: v_perm_b32 v1, v3, v1, s0
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v0, v12, v6, vcc
-; GFX9-NEXT: v_perm_b32 v0, v7, v0, s4
-; GFX9-NEXT: v_perm_b32 v1, v3, v1, s4
+; GFX9-NEXT: v_perm_b32 v0, v7, v0, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call <4 x half> @llvm.fabs.v4f16(<4 x half> %a)
%b.fabs = call <4 x half> @llvm.fabs.v4f16(<4 x half> %b)
@@ -2185,33 +2394,41 @@ define <4 x half> @v_fminimum3_v4f16__fneg_all(<4 x half> %a, <4 x half> %b, <4
; GFX9-NEXT: v_pk_min_f16 v6, v0, v2 neg_lo:[1,1] neg_hi:[1,1]
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, -v0, -v2
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v8, v7, v6, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v6
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, -v0, -v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX9-NEXT: v_pk_min_f16 v2, v1, v3 neg_lo:[1,1] neg_hi:[1,1]
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, -v1, -v3
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v6, v7, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, -v1, -v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v2, v1, v6, s4
+; GFX9-NEXT: v_perm_b32 v2, v1, v6, s0
; GFX9-NEXT: v_pk_min_f16 v2, v2, v5 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v6, -v5
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, -v5 src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX9-NEXT: v_perm_b32 v2, v0, v8, s4
+; GFX9-NEXT: v_perm_b32 v2, v0, v8, s0
; GFX9-NEXT: v_pk_min_f16 v2, v2, v4 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v8, -v4
+; GFX9-NEXT: v_perm_b32 v1, v1, v3, s0
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v5, v7, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, -v4 src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX9-NEXT: v_perm_b32 v0, v0, v5, s4
-; GFX9-NEXT: v_perm_b32 v1, v1, v3, s4
+; GFX9-NEXT: v_perm_b32 v0, v0, v5, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg <4 x half> %a
%b.fneg = fneg <4 x half> %b
@@ -2240,35 +2457,41 @@ define <4 x half> @v_fminimum3_v4f16__inlineimm1(<4 x half> %a, <4 x half> %c) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_pk_min_f16 v4, v0, 2.0 op_sel_hi:[1,0]
-; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v4
; GFX9-NEXT: v_mov_b32_e32 v6, 0x7e00
+; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v4
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1
; GFX9-NEXT: v_pk_min_f16 v7, v1, 2.0 op_sel_hi:[1,0]
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
; GFX9-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v8, 16, v7
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v8, v6, v8, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v6, v4, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v7, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v4, v8, v1, s4
+; GFX9-NEXT: v_perm_b32 v4, v8, v1, s0
; GFX9-NEXT: v_pk_min_f16 v4, v4, v3
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v8, v3 src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT: v_perm_b32 v8, v5, v0, s4
+; GFX9-NEXT: v_perm_b32 v8, v5, v0, s0
; GFX9-NEXT: v_lshrrev_b32_e32 v7, 16, v4
; GFX9-NEXT: v_pk_min_f16 v8, v8, v2
; GFX9-NEXT: v_cndmask_b32_e32 v7, v6, v7, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v9, 16, v8
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v5, v2 src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v5, v6, v9, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v4, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX9-NEXT: v_perm_b32 v1, v7, v1, s0
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v0, v6, v8, vcc
-; GFX9-NEXT: v_perm_b32 v0, v5, v0, s4
-; GFX9-NEXT: v_perm_b32 v1, v7, v1, s4
+; GFX9-NEXT: v_perm_b32 v0, v5, v0, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <4 x half> @llvm.minimum.v4f16(<4 x half> %a, <4 x half> <half 2.0, half 2.0, half 2.0, half 2.0>)
%max1 = call <4 x half> @llvm.minimum.v4f16(<4 x half> %max0, <4 x half> %c)
@@ -2296,33 +2519,41 @@ define <4 x half> @v_fminimum3_v4f16__inlineimm2(<4 x half> %a, <4 x half> %b) {
; GFX9-NEXT: v_pk_min_f16 v4, v0, v2
; GFX9-NEXT: v_mov_b32_e32 v5, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v4
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
; GFX9-NEXT: v_pk_min_f16 v2, v1, v3
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v2, v1, v4, s4
+; GFX9-NEXT: v_perm_b32 v2, v1, v4, s0
; GFX9-NEXT: v_pk_min_f16 v2, v2, 4.0 op_sel_hi:[1,0]
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v4, v4
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v3, v5, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
-; GFX9-NEXT: v_perm_b32 v2, v0, v6, s4
+; GFX9-NEXT: v_perm_b32 v2, v0, v6, s0
; GFX9-NEXT: v_pk_min_f16 v2, v2, 4.0 op_sel_hi:[1,0]
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v6, v6
+; GFX9-NEXT: v_perm_b32 v1, v1, v3, s0
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc
-; GFX9-NEXT: v_perm_b32 v0, v0, v4, s4
-; GFX9-NEXT: v_perm_b32 v1, v1, v3, s4
+; GFX9-NEXT: v_perm_b32 v0, v0, v4, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <4 x half> @llvm.minimum.v4f16(<4 x half> %a, <4 x half> %b)
%max1 = call <4 x half> @llvm.minimum.v4f16(<4 x half> %max0, <4 x half> <half 4.0, half 4.0, half 4.0, half 4.0>)
@@ -2346,12 +2577,14 @@ define double @v_fminimum3_f64(double %a, double %b, double %c) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_min_f64 v[6:7], v[0:1], v[2:3]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], v[4:5]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2377,12 +2610,14 @@ define double @v_fminimum3_f64_commute(double %a, double %b, double %c) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_min_f64 v[6:7], v[0:1], v[2:3]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX9-NEXT: v_min_f64 v[2:3], v[4:5], v[0:1]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[0:1]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2404,19 +2639,20 @@ define amdgpu_ps <2 x i32> @s_fminimum3_f64(double inreg %a, double inreg %b, do
;
; GFX9-LABEL: s_fminimum3_f64:
; GFX9: ; %bb.0:
-; GFX9-NEXT: v_mov_b32_e32 v0, s2
-; GFX9-NEXT: v_mov_b32_e32 v1, s3
+; GFX9-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
; GFX9-NEXT: v_min_f64 v[2:3], s[0:1], v[0:1]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, s[0:1], v[0:1]
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, s[0:1], v[0:1]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], s[4:5]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, s[4:5], v[0:1]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v4, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v1, v2, 0, vcc
-; GFX9-NEXT: v_readfirstlane_b32 s0, v1
; GFX9-NEXT: v_readfirstlane_b32 s1, v0
+; GFX9-NEXT: v_readfirstlane_b32 s0, v1
; GFX9-NEXT: ; return to shader part epilog
%max0 = call double @llvm.minimum.f64(double %a, double %b)
%max1 = call double @llvm.minimum.f64(double %max0, double %c)
@@ -2447,12 +2683,14 @@ define double @v_fminimum3_f64_fabs0(double %a, double %b, double %c) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_min_f64 v[6:7], |v[0:1]|, v[2:3]
-; GFX9-NEXT: v_cmp_u_f64_e64 vcc, |v[0:1]|, v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e64 vcc, |v[0:1]|, v[2:3]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], v[4:5]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2479,12 +2717,14 @@ define double @v_fminimum3_f64_fabs1(double %a, double %b, double %c) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_min_f64 v[6:7], v[0:1], |v[2:3]|
-; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], |v[2:3]|
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], |v[2:3]|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], v[4:5]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2511,12 +2751,14 @@ define double @v_fminimum3_f64_fabs2(double %a, double %b, double %c) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_min_f64 v[6:7], v[0:1], v[2:3]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], |v[4:5]|
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], |v[4:5]|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2543,12 +2785,14 @@ define double @v_fminimum3_f64_fabs_all(double %a, double %b, double %c) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_min_f64 v[6:7], |v[0:1]|, |v[2:3]|
-; GFX9-NEXT: v_cmp_u_f64_e64 vcc, |v[0:1]|, |v[2:3]|
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e64 vcc, |v[0:1]|, |v[2:3]|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], |v[4:5]|
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], |v[4:5]|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2577,12 +2821,14 @@ define double @v_fminimum3_f64_fneg_all(double %a, double %b, double %c) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_min_f64 v[6:7], -v[0:1], -v[2:3]
-; GFX9-NEXT: v_cmp_u_f64_e64 vcc, -v[0:1], -v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e64 vcc, -v[0:1], -v[2:3]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], -v[4:5]
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], -v[4:5]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2611,12 +2857,14 @@ define double @v_fminimum3_f64_fneg_fabs_all(double %a, double %b, double %c) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_min_f64 v[6:7], -|v[0:1]|, -|v[2:3]|
-; GFX9-NEXT: v_cmp_u_f64_e64 vcc, -|v[0:1]|, -|v[2:3]|
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e64 vcc, -|v[0:1]|, -|v[2:3]|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], -|v[4:5]|
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], -|v[4:5]|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2648,12 +2896,14 @@ define double @v_fminimum3_f64_fneg0(double %a, double %b, double %c) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_min_f64 v[6:7], -v[0:1], v[2:3]
-; GFX9-NEXT: v_cmp_u_f64_e64 vcc, -v[0:1], v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e64 vcc, -v[0:1], v[2:3]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], v[4:5]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2680,12 +2930,14 @@ define double @v_fminimum3_f64_fneg1(double %a, double %b, double %c) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_min_f64 v[6:7], v[0:1], -v[2:3]
-; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], -v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], -v[2:3]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], v[4:5]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2712,12 +2964,14 @@ define double @v_fminimum3_f64_fneg2(double %a, double %b, double %c) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_min_f64 v[6:7], v[0:1], v[2:3]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], -v[4:5]
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], -v[4:5]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2743,15 +2997,17 @@ define double @v_fminimum3_f64_const0(double %b, double %c) {
; GFX9-LABEL: v_fminimum3_f64_const0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_mov_b32 s4, 0
-; GFX9-NEXT: s_mov_b32 s5, 0x40200000
-; GFX9-NEXT: v_min_f64 v[4:5], v[0:1], s[4:5]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT: s_mov_b32 s0, 0
+; GFX9-NEXT: s_mov_b32 s1, 0x40200000
+; GFX9-NEXT: v_min_f64 v[4:5], v[0:1], s[0:1]
; GFX9-NEXT: v_mov_b32_e32 v6, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
; GFX9-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2777,14 +3033,15 @@ define double @v_fminimum3_f64__const2(double %a, double %b) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v6, 0x7ff80000
-; GFX9-NEXT: s_mov_b32 s4, 0
-; GFX9-NEXT: s_mov_b32 s5, 0x40200000
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT: s_mov_b32 s0, 0
+; GFX9-NEXT: s_mov_b32 s1, 0x40200000
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], s[4:5]
+; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], s[0:1]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v6, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2810,12 +3067,14 @@ define double @v_fminimum3_f64_inlineimm0(double %b, double %c) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_min_f64 v[4:5], v[0:1], 4.0
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
; GFX9-NEXT: v_mov_b32_e32 v6, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
; GFX9-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2841,12 +3100,14 @@ define double @v_fminimum3_f64__inlineimm(double %a, double %b) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v6, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], 4.0
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v6, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2871,17 +3132,18 @@ define double @v_fminimum3_f64_const1_const2(double %a) {
; GFX9-LABEL: v_fminimum3_f64_const1_const2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_mov_b32 s4, 0
-; GFX9-NEXT: s_mov_b32 s5, 0x40200000
-; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], s[4:5]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT: s_mov_b32 s0, 0
+; GFX9-NEXT: s_mov_b32 s1, 0x40200000
+; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], s[0:1]
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7ff80000
-; GFX9-NEXT: s_mov_b32 s4, 0
-; GFX9-NEXT: s_mov_b32 s5, 0x40300000
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT: s_mov_b32 s0, 0
+; GFX9-NEXT: s_mov_b32 s1, 0x40300000
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], s[4:5]
+; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], s[0:1]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2909,9 +3171,11 @@ define <2 x float> @v_no_fminimum3_f32__multi_use(float %a, float %b, float %c)
; GFX9-NEXT: v_min_f32_e32 v3, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_min_f32_e32 v1, v0, v2
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.minimum.f32(float %a, float %b)
@@ -2935,11 +3199,14 @@ define amdgpu_ps <2 x i32> @s_no_fminimum3_f32__multi_use(float inreg %a, float
; GFX9-NEXT: v_min_f32_e32 v1, s0, v0
; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX9-NEXT: v_min_f32_e32 v1, s2, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s2, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_readfirstlane_b32 s1, v1
; GFX9-NEXT: ; return to shader part epilog
%max0 = call float @llvm.minimum.f32(float %a, float %b)
@@ -2973,9 +3240,11 @@ define <2 x half> @v_no_fminimum3_f16__multi_use(half %a, half %b, half %c) {
; GFX9-NEXT: v_min_f16_e32 v3, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_min_f16_e32 v1, v0, v2
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -3002,11 +3271,13 @@ define amdgpu_ps <2 x i32> @s_no_fminimum3_f16__multi_use(half inreg %a, half in
; GFX9-NEXT: v_min_f16_e32 v1, s0, v0
; GFX9-NEXT: v_mov_b32_e32 v2, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, s0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX9-NEXT: v_min_f16_e32 v1, s2, v0
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, s2, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
; GFX9-NEXT: v_readfirstlane_b32 s1, v1
@@ -3043,19 +3314,23 @@ define <4 x half> @v_no_fminimum3_v2f16__multi_use(<2 x half> %a, <2 x half> %b,
; GFX9-NEXT: v_pk_min_f16 v3, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v5, v4, v3, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v0, v1, v5, s4
+; GFX9-NEXT: v_perm_b32 v0, v1, v5, s0
; GFX9-NEXT: v_pk_min_f16 v3, v0, v2
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v5, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v5, v4, v3, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
-; GFX9-NEXT: v_perm_b32 v1, v1, v5, s4
+; GFX9-NEXT: v_perm_b32 v1, v1, v5, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x half> @llvm.minimum.f16(<2 x half> %a, <2 x half> %b)
%max1 = call <2 x half> @llvm.minimum.f16(<2 x half> %max0, <2 x half> %c)
@@ -3080,12 +3355,14 @@ define <2 x double> @v_no_fminimum3_f64__multi_use(double %a, double %b, double
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_min_f64 v[6:7], v[0:1], v[2:3]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], v[4:5]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -3095,3 +3372,6 @@ define <2 x double> @v_no_fminimum3_f64__multi_use(double %a, double %b, double
%insert.1 = insertelement <2 x double> %insert.0, double %max1, i32 1
ret <2 x double> %insert.1
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX940: {{.*}}
+; GFX950: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll
index 8313f5b655efba..bd35ee3f009736 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX940 %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX940 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX940 %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX940 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll
index d90c4a75ac5dea..e782f53cee6087 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck -check-prefixes=GCN,GFX7 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GCN,GFX8 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GCN,GFX940 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX900 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX950 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX10 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GCN,GFX12 %s
@@ -30,24 +30,24 @@ define half @v_maximum_f16(half %src0, half %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_f16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f16_e32 v2, v0, v1
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x7e00
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_f16:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f16_e32 v2, v0, v1
-; GFX940-NEXT: v_mov_b32_e32 v3, 0x7e00
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_f16:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f16_e32 v2, v0, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, 0x7e00
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_f16:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f16_e32 v2, v0, v1
+; GFX950-NEXT: v_mov_b32_e32 v3, 0x7e00
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_f16:
; GFX10: ; %bb.0:
@@ -102,12 +102,6 @@ define half @v_maximum_f16__nnan(half %src0, half %src1) {
; GFX9-NEXT: v_max_f16_e32 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_maximum_f16__nnan:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_maximum_f16__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -156,24 +150,24 @@ define half @v_maximum_f16__nsz(half %src0, half %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_f16__nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f16_e32 v2, v0, v1
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x7e00
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_f16__nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f16_e32 v2, v0, v1
-; GFX940-NEXT: v_mov_b32_e32 v3, 0x7e00
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_f16__nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f16_e32 v2, v0, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, 0x7e00
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_f16__nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f16_e32 v2, v0, v1
+; GFX950-NEXT: v_mov_b32_e32 v3, 0x7e00
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_f16__nsz:
; GFX10: ; %bb.0:
@@ -228,12 +222,6 @@ define half @v_maximum_f16__nnan_nsz(half %src0, half %src1) {
; GFX9-NEXT: v_max_f16_e32 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_maximum_f16__nnan_nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_maximum_f16__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -284,26 +272,26 @@ define half @v_maximum_f16__nnan_src0(half %arg0, half %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_f16__nnan_src0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_add_f16_e32 v0, 1.0, v0
-; GFX9-NEXT: v_max_f16_e32 v2, v0, v1
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x7e00
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_f16__nnan_src0:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_add_f16_e32 v0, 1.0, v0
-; GFX940-NEXT: v_max_f16_e32 v2, v0, v1
-; GFX940-NEXT: v_mov_b32_e32 v3, 0x7e00
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_f16__nnan_src0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_add_f16_e32 v0, 1.0, v0
+; GFX900-NEXT: v_max_f16_e32 v2, v0, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, 0x7e00
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_f16__nnan_src0:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_add_f16_e32 v0, 1.0, v0
+; GFX950-NEXT: v_max_f16_e32 v2, v0, v1
+; GFX950-NEXT: v_mov_b32_e32 v3, 0x7e00
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_f16__nnan_src0:
; GFX10: ; %bb.0:
@@ -365,26 +353,26 @@ define half @v_maximum_f16__nnan_src1(half %src0, half %arg1) {
; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_f16__nnan_src1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_add_f16_e32 v1, 1.0, v1
-; GFX9-NEXT: v_max_f16_e32 v2, v0, v1
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x7e00
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_f16__nnan_src1:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_add_f16_e32 v1, 1.0, v1
-; GFX940-NEXT: v_max_f16_e32 v2, v0, v1
-; GFX940-NEXT: v_mov_b32_e32 v3, 0x7e00
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_f16__nnan_src1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_add_f16_e32 v1, 1.0, v1
+; GFX900-NEXT: v_max_f16_e32 v2, v0, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, 0x7e00
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_f16__nnan_src1:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_add_f16_e32 v1, 1.0, v1
+; GFX950-NEXT: v_max_f16_e32 v2, v0, v1
+; GFX950-NEXT: v_mov_b32_e32 v3, 0x7e00
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_f16__nnan_src1:
; GFX10: ; %bb.0:
@@ -453,34 +441,34 @@ define void @s_maximum_f16(half inreg %src0, half inreg %src1) {
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: s_maximum_f16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v0, s17
-; GFX9-NEXT: v_max_f16_e32 v1, s16, v0
-; GFX9-NEXT: v_mov_b32_e32 v2, 0x7e00
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, s16, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use v0
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: s_maximum_f16:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_mov_b32_e32 v0, s1
-; GFX940-NEXT: v_max_f16_e32 v1, s0, v0
-; GFX940-NEXT: v_mov_b32_e32 v2, 0x7e00
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, s0, v0
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX940-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX940-NEXT: ;;#ASMSTART
-; GFX940-NEXT: ; use v0
-; GFX940-NEXT: ;;#ASMEND
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_maximum_f16:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, s17
+; GFX900-NEXT: v_max_f16_e32 v1, s16, v0
+; GFX900-NEXT: v_mov_b32_e32 v2, 0x7e00
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, s16, v0
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX900-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use v0
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: s_maximum_f16:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_mov_b32_e32 v0, s1
+; GFX950-NEXT: v_max_f16_e32 v1, s0, v0
+; GFX950-NEXT: v_mov_b32_e32 v2, 0x7e00
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, s0, v0
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX950-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX950-NEXT: ;;#ASMSTART
+; GFX950-NEXT: ; use v0
+; GFX950-NEXT: ;;#ASMEND
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: s_maximum_f16:
; GFX10: ; %bb.0:
@@ -567,35 +555,35 @@ define <2 x half> @v_maximum_v2f16(<2 x half> %src0, <2 x half> %src1) {
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v2f16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_max_f16 v2, v0, v1
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x7e00
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v3, v2, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v0, v0, v4, s4
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_v2f16:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_pk_max_f16 v2, v0, v1
-; GFX940-NEXT: v_mov_b32_e32 v3, 0x7e00
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
-; GFX940-NEXT: s_mov_b32 s0, 0x5040100
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v3, v2, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX940-NEXT: v_perm_b32 v0, v0, v4, s0
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v2f16:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_pk_max_f16 v2, v0, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, 0x7e00
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX900-NEXT: v_cndmask_b32_e32 v4, v3, v2, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v2, 16, v2
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX900-NEXT: s_mov_b32 s4, 0x5040100
+; GFX900-NEXT: v_perm_b32 v0, v0, v4, s4
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v2f16:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_pk_max_f16 v2, v0, v1
+; GFX950-NEXT: v_mov_b32_e32 v3, 0x7e00
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX950-NEXT: s_mov_b32 s0, 0x5040100
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v4, v3, v2, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v2, 16, v2
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX950-NEXT: v_perm_b32 v0, v0, v4, s0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v2f16:
; GFX10: ; %bb.0:
@@ -668,12 +656,6 @@ define <2 x half> @v_maximum_v2f16__nnan(<2 x half> %src0, <2 x half> %src1) {
; GFX9-NEXT: v_pk_max_f16 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_maximum_v2f16__nnan:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_pk_max_f16 v0, v0, v1
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_maximum_v2f16__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -736,35 +718,35 @@ define <2 x half> @v_maximum_v2f16__nsz(<2 x half> %src0, <2 x half> %src1) {
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v2f16__nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_max_f16 v2, v0, v1
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x7e00
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v3, v2, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v0, v0, v4, s4
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_v2f16__nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_pk_max_f16 v2, v0, v1
-; GFX940-NEXT: v_mov_b32_e32 v3, 0x7e00
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
-; GFX940-NEXT: s_mov_b32 s0, 0x5040100
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v3, v2, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX940-NEXT: v_perm_b32 v0, v0, v4, s0
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v2f16__nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_pk_max_f16 v2, v0, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, 0x7e00
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX900-NEXT: v_cndmask_b32_e32 v4, v3, v2, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v2, 16, v2
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX900-NEXT: s_mov_b32 s4, 0x5040100
+; GFX900-NEXT: v_perm_b32 v0, v0, v4, s4
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v2f16__nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_pk_max_f16 v2, v0, v1
+; GFX950-NEXT: v_mov_b32_e32 v3, 0x7e00
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX950-NEXT: s_mov_b32 s0, 0x5040100
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v4, v3, v2, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v2, 16, v2
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX950-NEXT: v_perm_b32 v0, v0, v4, s0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v2f16__nsz:
; GFX10: ; %bb.0:
@@ -837,12 +819,6 @@ define <2 x half> @v_maximum_v2f16__nnan_nsz(<2 x half> %src0, <2 x half> %src1)
; GFX9-NEXT: v_pk_max_f16 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_maximum_v2f16__nnan_nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_pk_max_f16 v0, v0, v1
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_maximum_v2f16__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -917,50 +893,50 @@ define void @s_maximum_v2f16(<2 x half> inreg %src0, <2 x half> inreg %src1) {
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: s_maximum_v2f16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v0, s17
-; GFX9-NEXT: v_mov_b32_e32 v1, s17
-; GFX9-NEXT: s_lshr_b32 s4, s17, 16
-; GFX9-NEXT: v_pk_max_f16 v1, s16, v1
-; GFX9-NEXT: v_mov_b32_e32 v2, 0x7e00
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, s16, v0
-; GFX9-NEXT: s_lshr_b32 s5, s16, 16
-; GFX9-NEXT: v_mov_b32_e32 v3, s4
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v1
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, s5, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX9-NEXT: v_lshl_or_b32 v0, v1, 16, v0
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use v0
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: s_maximum_v2f16:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_mov_b32_e32 v0, s1
-; GFX940-NEXT: v_mov_b32_e32 v1, s1
-; GFX940-NEXT: s_lshr_b32 s1, s1, 16
-; GFX940-NEXT: v_pk_max_f16 v1, s0, v1
-; GFX940-NEXT: v_mov_b32_e32 v2, 0x7e00
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, s0, v0
-; GFX940-NEXT: s_lshr_b32 s0, s0, 16
-; GFX940-NEXT: v_mov_b32_e32 v3, s1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, s0, v3
-; GFX940-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX940-NEXT: v_lshl_or_b32 v0, v1, 16, v0
-; GFX940-NEXT: ;;#ASMSTART
-; GFX940-NEXT: ; use v0
-; GFX940-NEXT: ;;#ASMEND
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_maximum_v2f16:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, s17
+; GFX900-NEXT: v_mov_b32_e32 v1, s17
+; GFX900-NEXT: s_lshr_b32 s4, s17, 16
+; GFX900-NEXT: v_pk_max_f16 v1, s16, v1
+; GFX900-NEXT: v_mov_b32_e32 v2, 0x7e00
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, s16, v0
+; GFX900-NEXT: s_lshr_b32 s5, s16, 16
+; GFX900-NEXT: v_mov_b32_e32 v3, s4
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, s5, v3
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX900-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX900-NEXT: v_lshl_or_b32 v0, v1, 16, v0
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use v0
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: s_maximum_v2f16:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_mov_b32_e32 v0, s1
+; GFX950-NEXT: v_mov_b32_e32 v1, s1
+; GFX950-NEXT: s_lshr_b32 s1, s1, 16
+; GFX950-NEXT: v_pk_max_f16 v1, s0, v1
+; GFX950-NEXT: v_mov_b32_e32 v2, 0x7e00
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, s0, v0
+; GFX950-NEXT: s_lshr_b32 s0, s0, 16
+; GFX950-NEXT: v_mov_b32_e32 v3, s1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v1, 16, v1
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, s0, v3
+; GFX950-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX950-NEXT: v_lshl_or_b32 v0, v1, 16, v0
+; GFX950-NEXT: ;;#ASMSTART
+; GFX950-NEXT: ; use v0
+; GFX950-NEXT: ;;#ASMEND
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: s_maximum_v2f16:
; GFX10: ; %bb.0:
@@ -1065,41 +1041,41 @@ define <3 x half> @v_maximum_v3f16(<3 x half> %src0, <3 x half> %src1) {
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v3f16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_max_f16 v4, v1, v3
-; GFX9-NEXT: v_mov_b32_e32 v5, 0x7e00
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX9-NEXT: v_pk_max_f16 v3, v0, v2
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v3
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v0, v0, v4, s4
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_v3f16:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_pk_max_f16 v4, v1, v3
-; GFX940-NEXT: v_mov_b32_e32 v5, 0x7e00
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
-; GFX940-NEXT: v_pk_max_f16 v3, v0, v2
-; GFX940-NEXT: s_mov_b32 s0, 0x5040100
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v3, 16, v3
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
-; GFX940-NEXT: v_perm_b32 v0, v0, v4, s0
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v3f16:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_pk_max_f16 v4, v1, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, 0x7e00
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
+; GFX900-NEXT: v_pk_max_f16 v3, v0, v2
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX900-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v3, 16, v3
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
+; GFX900-NEXT: s_mov_b32 s4, 0x5040100
+; GFX900-NEXT: v_perm_b32 v0, v0, v4, s4
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v3f16:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_pk_max_f16 v4, v1, v3
+; GFX950-NEXT: v_mov_b32_e32 v5, 0x7e00
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX950-NEXT: v_pk_max_f16 v3, v0, v2
+; GFX950-NEXT: s_mov_b32 s0, 0x5040100
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v3, 16, v3
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
+; GFX950-NEXT: v_perm_b32 v0, v0, v4, s0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v3f16:
; GFX10: ; %bb.0:
@@ -1187,13 +1163,6 @@ define <3 x half> @v_maximum_v3f16__nnan(<3 x half> %src0, <3 x half> %src1) {
; GFX9-NEXT: v_pk_max_f16 v1, v1, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_maximum_v3f16__nnan:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_pk_max_f16 v0, v0, v2
-; GFX940-NEXT: v_pk_max_f16 v1, v1, v3
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_maximum_v3f16__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1269,41 +1238,41 @@ define <3 x half> @v_maximum_v3f16__nsz(<3 x half> %src0, <3 x half> %src1) {
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v3f16__nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_max_f16 v4, v1, v3
-; GFX9-NEXT: v_mov_b32_e32 v5, 0x7e00
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX9-NEXT: v_pk_max_f16 v3, v0, v2
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v3
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v0, v0, v4, s4
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_v3f16__nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_pk_max_f16 v4, v1, v3
-; GFX940-NEXT: v_mov_b32_e32 v5, 0x7e00
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
-; GFX940-NEXT: v_pk_max_f16 v3, v0, v2
-; GFX940-NEXT: s_mov_b32 s0, 0x5040100
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v3, 16, v3
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
-; GFX940-NEXT: v_perm_b32 v0, v0, v4, s0
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v3f16__nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_pk_max_f16 v4, v1, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, 0x7e00
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
+; GFX900-NEXT: v_pk_max_f16 v3, v0, v2
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX900-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v3, 16, v3
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
+; GFX900-NEXT: s_mov_b32 s4, 0x5040100
+; GFX900-NEXT: v_perm_b32 v0, v0, v4, s4
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v3f16__nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_pk_max_f16 v4, v1, v3
+; GFX950-NEXT: v_mov_b32_e32 v5, 0x7e00
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX950-NEXT: v_pk_max_f16 v3, v0, v2
+; GFX950-NEXT: s_mov_b32 s0, 0x5040100
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v3, 16, v3
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
+; GFX950-NEXT: v_perm_b32 v0, v0, v4, s0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v3f16__nsz:
; GFX10: ; %bb.0:
@@ -1391,13 +1360,6 @@ define <3 x half> @v_maximum_v3f16__nnan_nsz(<3 x half> %src0, <3 x half> %src1)
; GFX9-NEXT: v_pk_max_f16 v1, v1, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_maximum_v3f16__nnan_nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_pk_max_f16 v0, v0, v2
-; GFX940-NEXT: v_pk_max_f16 v1, v1, v3
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_maximum_v3f16__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1487,51 +1449,51 @@ define <4 x half> @v_maximum_v4f16(<4 x half> %src0, <4 x half> %src1) {
; GFX8-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v4f16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_max_f16 v4, v1, v3
-; GFX9-NEXT: v_mov_b32_e32 v5, 0x7e00
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v4
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX9-NEXT: v_pk_max_f16 v3, v0, v2
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v3
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v0, v0, v4, s4
-; GFX9-NEXT: v_perm_b32 v1, v1, v6, s4
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_v4f16:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_pk_max_f16 v4, v1, v3
-; GFX940-NEXT: v_mov_b32_e32 v5, 0x7e00
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
-; GFX940-NEXT: s_mov_b32 s0, 0x5040100
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v4, 16, v4
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: v_pk_max_f16 v3, v0, v2
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
-; GFX940-NEXT: v_perm_b32 v1, v1, v6, s0
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v3, 16, v3
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
-; GFX940-NEXT: v_perm_b32 v0, v0, v4, s0
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v4f16:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_pk_max_f16 v4, v1, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, 0x7e00
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX900-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v4, 16, v4
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
+; GFX900-NEXT: v_pk_max_f16 v3, v0, v2
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX900-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v3, 16, v3
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
+; GFX900-NEXT: s_mov_b32 s4, 0x5040100
+; GFX900-NEXT: v_perm_b32 v0, v0, v4, s4
+; GFX900-NEXT: v_perm_b32 v1, v1, v6, s4
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v4f16:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_pk_max_f16 v4, v1, v3
+; GFX950-NEXT: v_mov_b32_e32 v5, 0x7e00
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX950-NEXT: s_mov_b32 s0, 0x5040100
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v4, 16, v4
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: v_pk_max_f16 v3, v0, v2
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX950-NEXT: v_perm_b32 v1, v1, v6, s0
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v3, 16, v3
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
+; GFX950-NEXT: v_perm_b32 v0, v0, v4, s0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v4f16:
; GFX10: ; %bb.0:
@@ -1635,13 +1597,6 @@ define <4 x half> @v_maximum_v4f16__nnan(<4 x half> %src0, <4 x half> %src1) {
; GFX9-NEXT: v_pk_max_f16 v1, v1, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_maximum_v4f16__nnan:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_pk_max_f16 v0, v0, v2
-; GFX940-NEXT: v_pk_max_f16 v1, v1, v3
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_maximum_v4f16__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1731,51 +1686,51 @@ define <4 x half> @v_maximum_v4f16__nsz(<4 x half> %src0, <4 x half> %src1) {
; GFX8-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v4f16__nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_max_f16 v4, v1, v3
-; GFX9-NEXT: v_mov_b32_e32 v5, 0x7e00
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v4
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX9-NEXT: v_pk_max_f16 v3, v0, v2
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v3
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v0, v0, v4, s4
-; GFX9-NEXT: v_perm_b32 v1, v1, v6, s4
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_v4f16__nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_pk_max_f16 v4, v1, v3
-; GFX940-NEXT: v_mov_b32_e32 v5, 0x7e00
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
-; GFX940-NEXT: s_mov_b32 s0, 0x5040100
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v4, 16, v4
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: v_pk_max_f16 v3, v0, v2
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
-; GFX940-NEXT: v_perm_b32 v1, v1, v6, s0
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v3, 16, v3
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
-; GFX940-NEXT: v_perm_b32 v0, v0, v4, s0
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v4f16__nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_pk_max_f16 v4, v1, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, 0x7e00
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX900-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v4, 16, v4
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
+; GFX900-NEXT: v_pk_max_f16 v3, v0, v2
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX900-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v3, 16, v3
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
+; GFX900-NEXT: s_mov_b32 s4, 0x5040100
+; GFX900-NEXT: v_perm_b32 v0, v0, v4, s4
+; GFX900-NEXT: v_perm_b32 v1, v1, v6, s4
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v4f16__nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_pk_max_f16 v4, v1, v3
+; GFX950-NEXT: v_mov_b32_e32 v5, 0x7e00
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX950-NEXT: s_mov_b32 s0, 0x5040100
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v4, 16, v4
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: v_pk_max_f16 v3, v0, v2
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX950-NEXT: v_perm_b32 v1, v1, v6, s0
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v3, 16, v3
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
+; GFX950-NEXT: v_perm_b32 v0, v0, v4, s0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v4f16__nsz:
; GFX10: ; %bb.0:
@@ -1879,13 +1834,6 @@ define <4 x half> @v_maximum_v4f16__nnan_nsz(<4 x half> %src0, <4 x half> %src1)
; GFX9-NEXT: v_pk_max_f16 v1, v1, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_maximum_v4f16__nnan_nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_pk_max_f16 v0, v0, v2
-; GFX940-NEXT: v_pk_max_f16 v1, v1, v3
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_maximum_v4f16__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2023,83 +1971,83 @@ define <8 x half> @v_maximum_v8f16(<8 x half> %src0, <8 x half> %src1) {
; GFX8-NEXT: v_or_b32_sdwa v3, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v8f16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_max_f16 v8, v3, v7
-; GFX9-NEXT: v_mov_b32_e32 v9, 0x7e00
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v3, v7
-; GFX9-NEXT: v_cndmask_b32_e32 v10, v9, v8, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v8, 16, v8
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v3, v7 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v9, v8, vcc
-; GFX9-NEXT: v_pk_max_f16 v7, v2, v6
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v2, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v9, v7, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v7, 16, v7
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v2, v6 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v9, v7, vcc
-; GFX9-NEXT: v_pk_max_f16 v6, v1, v5
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v7, v9, v6, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v6
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, v5 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v6, vcc
-; GFX9-NEXT: v_pk_max_f16 v5, v0, v4
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v9, v5, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v5
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v9, v5, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v0, v0, v6, s4
-; GFX9-NEXT: v_perm_b32 v1, v1, v7, s4
-; GFX9-NEXT: v_perm_b32 v2, v2, v8, s4
-; GFX9-NEXT: v_perm_b32 v3, v3, v10, s4
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_v8f16:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_pk_max_f16 v8, v3, v7
-; GFX940-NEXT: v_mov_b32_e32 v9, 0x7e00
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v3, v7
-; GFX940-NEXT: s_mov_b32 s0, 0x5040100
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v10, v9, v8, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v8, 16, v8
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v3, v7 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: v_pk_max_f16 v7, v2, v6
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v9, v8, vcc
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v2, v6
-; GFX940-NEXT: v_perm_b32 v3, v3, v10, s0
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v9, v7, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v7, 16, v7
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v2, v6 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: v_pk_max_f16 v6, v1, v5
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v9, v7, vcc
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v1, v5
-; GFX940-NEXT: v_perm_b32 v2, v2, v8, s0
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v9, v6, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v6, 16, v6
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v1, v5 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: v_pk_max_f16 v5, v0, v4
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v9, v6, vcc
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v4
-; GFX940-NEXT: v_perm_b32 v1, v1, v7, s0
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v9, v5, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v5, 16, v5
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v9, v5, vcc
-; GFX940-NEXT: v_perm_b32 v0, v0, v6, s0
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v8f16:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_pk_max_f16 v8, v3, v7
+; GFX900-NEXT: v_mov_b32_e32 v9, 0x7e00
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v3, v7
+; GFX900-NEXT: v_cndmask_b32_e32 v10, v9, v8, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v8, 16, v8
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v3, v7 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v3, v9, v8, vcc
+; GFX900-NEXT: v_pk_max_f16 v7, v2, v6
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v2, v6
+; GFX900-NEXT: v_cndmask_b32_e32 v8, v9, v7, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v7, 16, v7
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v2, v6 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v2, v9, v7, vcc
+; GFX900-NEXT: v_pk_max_f16 v6, v1, v5
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v1, v5
+; GFX900-NEXT: v_cndmask_b32_e32 v7, v9, v6, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v6, 16, v6
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v1, v5 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v9, v6, vcc
+; GFX900-NEXT: v_pk_max_f16 v5, v0, v4
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v4
+; GFX900-NEXT: v_cndmask_b32_e32 v6, v9, v5, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v5, 16, v5
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v9, v5, vcc
+; GFX900-NEXT: s_mov_b32 s4, 0x5040100
+; GFX900-NEXT: v_perm_b32 v0, v0, v6, s4
+; GFX900-NEXT: v_perm_b32 v1, v1, v7, s4
+; GFX900-NEXT: v_perm_b32 v2, v2, v8, s4
+; GFX900-NEXT: v_perm_b32 v3, v3, v10, s4
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v8f16:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_pk_max_f16 v8, v3, v7
+; GFX950-NEXT: v_mov_b32_e32 v9, 0x7e00
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v3, v7
+; GFX950-NEXT: s_mov_b32 s0, 0x5040100
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v10, v9, v8, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v8, 16, v8
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v3, v7 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: v_pk_max_f16 v7, v2, v6
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v9, v8, vcc
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v2, v6
+; GFX950-NEXT: v_perm_b32 v3, v3, v10, s0
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v8, v9, v7, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v7, 16, v7
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v2, v6 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: v_pk_max_f16 v6, v1, v5
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v2, v9, v7, vcc
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v1, v5
+; GFX950-NEXT: v_perm_b32 v2, v2, v8, s0
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v7, v9, v6, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v6, 16, v6
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v1, v5 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: v_pk_max_f16 v5, v0, v4
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v9, v6, vcc
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v0, v4
+; GFX950-NEXT: v_perm_b32 v1, v1, v7, s0
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v6, v9, v5, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v5, 16, v5
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v9, v5, vcc
+; GFX950-NEXT: v_perm_b32 v0, v0, v6, s0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v8f16:
; GFX10: ; %bb.0:
@@ -2400,147 +2348,147 @@ define <16 x half> @v_maximum_v16f16(<16 x half> %src0, <16 x half> %src1) {
; GFX8-NEXT: v_or_b32_sdwa v7, v7, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v16f16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_max_f16 v16, v7, v15
-; GFX9-NEXT: v_mov_b32_e32 v17, 0x7e00
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v7, v15
-; GFX9-NEXT: v_cndmask_b32_e32 v18, v17, v16, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v16, 16, v16
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v7, v15 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v7, v17, v16, vcc
-; GFX9-NEXT: v_pk_max_f16 v15, v6, v14
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v6, v14
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v17, v15, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v15, 16, v15
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v6, v14 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v17, v15, vcc
-; GFX9-NEXT: v_pk_max_f16 v14, v5, v13
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v5, v13
-; GFX9-NEXT: v_cndmask_b32_e32 v15, v17, v14, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v14, 16, v14
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v5, v13 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v17, v14, vcc
-; GFX9-NEXT: v_pk_max_f16 v13, v4, v12
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v4, v12
-; GFX9-NEXT: v_cndmask_b32_e32 v14, v17, v13, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v13, 16, v13
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v4, v12 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v17, v13, vcc
-; GFX9-NEXT: v_pk_max_f16 v12, v3, v11
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v3, v11
-; GFX9-NEXT: v_cndmask_b32_e32 v13, v17, v12, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v12, 16, v12
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v3, v11 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v17, v12, vcc
-; GFX9-NEXT: v_pk_max_f16 v11, v2, v10
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v2, v10
-; GFX9-NEXT: v_cndmask_b32_e32 v12, v17, v11, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v11, 16, v11
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v2, v10 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v17, v11, vcc
-; GFX9-NEXT: v_pk_max_f16 v10, v1, v9
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v9
-; GFX9-NEXT: v_cndmask_b32_e32 v11, v17, v10, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v10, 16, v10
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, v9 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v17, v10, vcc
-; GFX9-NEXT: v_pk_max_f16 v9, v0, v8
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v8
-; GFX9-NEXT: v_cndmask_b32_e32 v10, v17, v9, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v9, 16, v9
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v8 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v17, v9, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v0, v0, v10, s4
-; GFX9-NEXT: v_perm_b32 v1, v1, v11, s4
-; GFX9-NEXT: v_perm_b32 v2, v2, v12, s4
-; GFX9-NEXT: v_perm_b32 v3, v3, v13, s4
-; GFX9-NEXT: v_perm_b32 v4, v4, v14, s4
-; GFX9-NEXT: v_perm_b32 v5, v5, v15, s4
-; GFX9-NEXT: v_perm_b32 v6, v6, v16, s4
-; GFX9-NEXT: v_perm_b32 v7, v7, v18, s4
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_v16f16:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_pk_max_f16 v16, v7, v15
-; GFX940-NEXT: v_mov_b32_e32 v17, 0x7e00
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v7, v15
-; GFX940-NEXT: s_mov_b32 s0, 0x5040100
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v18, v17, v16, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v16, 16, v16
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v7, v15 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: v_pk_max_f16 v15, v6, v14
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v17, v16, vcc
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v6, v14
-; GFX940-NEXT: v_perm_b32 v7, v7, v18, s0
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v17, v15, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v15, 16, v15
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v6, v14 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: v_pk_max_f16 v14, v5, v13
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v17, v15, vcc
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v5, v13
-; GFX940-NEXT: v_perm_b32 v6, v6, v16, s0
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v15, v17, v14, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v14, 16, v14
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v5, v13 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: v_pk_max_f16 v13, v4, v12
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v17, v14, vcc
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v4, v12
-; GFX940-NEXT: v_perm_b32 v5, v5, v15, s0
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v14, v17, v13, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v13, 16, v13
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v4, v12 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: v_pk_max_f16 v12, v3, v11
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v17, v13, vcc
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v3, v11
-; GFX940-NEXT: v_perm_b32 v4, v4, v14, s0
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v13, v17, v12, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v12, 16, v12
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v3, v11 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: v_pk_max_f16 v11, v2, v10
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v17, v12, vcc
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v2, v10
-; GFX940-NEXT: v_perm_b32 v3, v3, v13, s0
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v12, v17, v11, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v11, 16, v11
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v2, v10 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: v_pk_max_f16 v10, v1, v9
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v17, v11, vcc
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v1, v9
-; GFX940-NEXT: v_perm_b32 v2, v2, v12, s0
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v11, v17, v10, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v10, 16, v10
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v1, v9 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: v_pk_max_f16 v9, v0, v8
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v17, v10, vcc
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v8
-; GFX940-NEXT: v_perm_b32 v1, v1, v11, s0
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v10, v17, v9, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v9, 16, v9
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v8 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v17, v9, vcc
-; GFX940-NEXT: v_perm_b32 v0, v0, v10, s0
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v16f16:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_pk_max_f16 v16, v7, v15
+; GFX900-NEXT: v_mov_b32_e32 v17, 0x7e00
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v7, v15
+; GFX900-NEXT: v_cndmask_b32_e32 v18, v17, v16, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v16, 16, v16
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v7, v15 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v7, v17, v16, vcc
+; GFX900-NEXT: v_pk_max_f16 v15, v6, v14
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v6, v14
+; GFX900-NEXT: v_cndmask_b32_e32 v16, v17, v15, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v15, 16, v15
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v6, v14 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v6, v17, v15, vcc
+; GFX900-NEXT: v_pk_max_f16 v14, v5, v13
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v5, v13
+; GFX900-NEXT: v_cndmask_b32_e32 v15, v17, v14, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v14, 16, v14
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v5, v13 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v5, v17, v14, vcc
+; GFX900-NEXT: v_pk_max_f16 v13, v4, v12
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v4, v12
+; GFX900-NEXT: v_cndmask_b32_e32 v14, v17, v13, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v13, 16, v13
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v4, v12 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v4, v17, v13, vcc
+; GFX900-NEXT: v_pk_max_f16 v12, v3, v11
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v3, v11
+; GFX900-NEXT: v_cndmask_b32_e32 v13, v17, v12, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v12, 16, v12
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v3, v11 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v3, v17, v12, vcc
+; GFX900-NEXT: v_pk_max_f16 v11, v2, v10
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v2, v10
+; GFX900-NEXT: v_cndmask_b32_e32 v12, v17, v11, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v11, 16, v11
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v2, v10 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v2, v17, v11, vcc
+; GFX900-NEXT: v_pk_max_f16 v10, v1, v9
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v1, v9
+; GFX900-NEXT: v_cndmask_b32_e32 v11, v17, v10, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v10, 16, v10
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v1, v9 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v17, v10, vcc
+; GFX900-NEXT: v_pk_max_f16 v9, v0, v8
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v8
+; GFX900-NEXT: v_cndmask_b32_e32 v10, v17, v9, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v9, 16, v9
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v0, v8 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v17, v9, vcc
+; GFX900-NEXT: s_mov_b32 s4, 0x5040100
+; GFX900-NEXT: v_perm_b32 v0, v0, v10, s4
+; GFX900-NEXT: v_perm_b32 v1, v1, v11, s4
+; GFX900-NEXT: v_perm_b32 v2, v2, v12, s4
+; GFX900-NEXT: v_perm_b32 v3, v3, v13, s4
+; GFX900-NEXT: v_perm_b32 v4, v4, v14, s4
+; GFX900-NEXT: v_perm_b32 v5, v5, v15, s4
+; GFX900-NEXT: v_perm_b32 v6, v6, v16, s4
+; GFX900-NEXT: v_perm_b32 v7, v7, v18, s4
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v16f16:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_pk_max_f16 v16, v7, v15
+; GFX950-NEXT: v_mov_b32_e32 v17, 0x7e00
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v7, v15
+; GFX950-NEXT: s_mov_b32 s0, 0x5040100
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v18, v17, v16, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v16, 16, v16
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v7, v15 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: v_pk_max_f16 v15, v6, v14
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v7, v17, v16, vcc
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v6, v14
+; GFX950-NEXT: v_perm_b32 v7, v7, v18, s0
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v16, v17, v15, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v15, 16, v15
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v6, v14 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: v_pk_max_f16 v14, v5, v13
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v6, v17, v15, vcc
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v5, v13
+; GFX950-NEXT: v_perm_b32 v6, v6, v16, s0
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v15, v17, v14, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v14, 16, v14
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v5, v13 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: v_pk_max_f16 v13, v4, v12
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v5, v17, v14, vcc
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v4, v12
+; GFX950-NEXT: v_perm_b32 v5, v5, v15, s0
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v14, v17, v13, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v13, 16, v13
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v4, v12 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: v_pk_max_f16 v12, v3, v11
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v4, v17, v13, vcc
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v3, v11
+; GFX950-NEXT: v_perm_b32 v4, v4, v14, s0
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v13, v17, v12, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v12, 16, v12
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v3, v11 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: v_pk_max_f16 v11, v2, v10
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v17, v12, vcc
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v2, v10
+; GFX950-NEXT: v_perm_b32 v3, v3, v13, s0
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v12, v17, v11, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v11, 16, v11
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v2, v10 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: v_pk_max_f16 v10, v1, v9
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v2, v17, v11, vcc
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v1, v9
+; GFX950-NEXT: v_perm_b32 v2, v2, v12, s0
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v11, v17, v10, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v10, 16, v10
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v1, v9 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: v_pk_max_f16 v9, v0, v8
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v17, v10, vcc
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v0, v8
+; GFX950-NEXT: v_perm_b32 v1, v1, v11, s0
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v10, v17, v9, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v9, 16, v9
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v0, v8 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v17, v9, vcc
+; GFX950-NEXT: v_perm_b32 v0, v0, v10, s0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v16f16:
; GFX10: ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll
index 48851cb030233d..c1fdfa2c4cf9ab 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll
@@ -2,7 +2,8 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck -check-prefixes=GCN,GFX7 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GCN,GFX8 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GCN,GFX940 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX900 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX950 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX10 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GCN,GFX12 %s
@@ -26,24 +27,24 @@ define float @v_maximum_f32(float %src0, float %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_f32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v2, v0, v1
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_f32:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f32_e32 v2, v0, v1
-; GFX940-NEXT: v_mov_b32_e32 v3, 0x7fc00000
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_f32:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f32_e32 v2, v0, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, 0x7fc00000
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_f32:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f32_e32 v2, v0, v1
+; GFX950-NEXT: v_mov_b32_e32 v3, 0x7fc00000
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_f32:
; GFX10: ; %bb.0:
@@ -94,12 +95,6 @@ define float @v_maximum_f32__nnan(float %src0, float %src1) {
; GFX9-NEXT: v_max_f32_e32 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_maximum_f32__nnan:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_maximum_f32__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -144,24 +139,24 @@ define float @v_maximum_f32__nsz(float %src0, float %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_f32__nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v2, v0, v1
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_f32__nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f32_e32 v2, v0, v1
-; GFX940-NEXT: v_mov_b32_e32 v3, 0x7fc00000
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_f32__nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f32_e32 v2, v0, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, 0x7fc00000
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_f32__nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f32_e32 v2, v0, v1
+; GFX950-NEXT: v_mov_b32_e32 v3, 0x7fc00000
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_f32__nsz:
; GFX10: ; %bb.0:
@@ -212,12 +207,6 @@ define float @v_maximum_f32__nnan_nsz(float %src0, float %src1) {
; GFX9-NEXT: v_max_f32_e32 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_maximum_f32__nnan_nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_maximum_f32__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -264,26 +253,26 @@ define float @v_maximum_f32__nnan_src0(float %arg0, float %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_f32__nnan_src0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_add_f32_e32 v0, 1.0, v0
-; GFX9-NEXT: v_max_f32_e32 v2, v0, v1
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_f32__nnan_src0:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_add_f32_e32 v0, 1.0, v0
-; GFX940-NEXT: v_max_f32_e32 v2, v0, v1
-; GFX940-NEXT: v_mov_b32_e32 v3, 0x7fc00000
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_f32__nnan_src0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_add_f32_e32 v0, 1.0, v0
+; GFX900-NEXT: v_max_f32_e32 v2, v0, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, 0x7fc00000
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_f32__nnan_src0:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_add_f32_e32 v0, 1.0, v0
+; GFX950-NEXT: v_max_f32_e32 v2, v0, v1
+; GFX950-NEXT: v_mov_b32_e32 v3, 0x7fc00000
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_f32__nnan_src0:
; GFX10: ; %bb.0:
@@ -341,26 +330,26 @@ define float @v_maximum_f32__nnan_src1(float %src0, float %arg1) {
; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_f32__nnan_src1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_add_f32_e32 v1, 1.0, v1
-; GFX9-NEXT: v_max_f32_e32 v2, v0, v1
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_f32__nnan_src1:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_add_f32_e32 v1, 1.0, v1
-; GFX940-NEXT: v_max_f32_e32 v2, v0, v1
-; GFX940-NEXT: v_mov_b32_e32 v3, 0x7fc00000
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_f32__nnan_src1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_add_f32_e32 v1, 1.0, v1
+; GFX900-NEXT: v_max_f32_e32 v2, v0, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, 0x7fc00000
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_f32__nnan_src1:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_add_f32_e32 v1, 1.0, v1
+; GFX950-NEXT: v_max_f32_e32 v2, v0, v1
+; GFX950-NEXT: v_mov_b32_e32 v3, 0x7fc00000
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_f32__nnan_src1:
; GFX10: ; %bb.0:
@@ -424,32 +413,32 @@ define void @s_maximum_f32(float inreg %src0, float inreg %src1) {
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: s_maximum_f32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v0, s17
-; GFX9-NEXT: v_max_f32_e32 v1, s16, v0
-; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s16, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use v0
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: s_maximum_f32:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_mov_b32_e32 v0, s1
-; GFX940-NEXT: v_max_f32_e32 v1, s0, v0
-; GFX940-NEXT: v_mov_b32_e32 v2, 0x7fc00000
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, s0, v0
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX940-NEXT: ;;#ASMSTART
-; GFX940-NEXT: ; use v0
-; GFX940-NEXT: ;;#ASMEND
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_maximum_f32:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, s17
+; GFX900-NEXT: v_max_f32_e32 v1, s16, v0
+; GFX900-NEXT: v_mov_b32_e32 v2, 0x7fc00000
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, s16, v0
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use v0
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: s_maximum_f32:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_mov_b32_e32 v0, s1
+; GFX950-NEXT: v_max_f32_e32 v1, s0, v0
+; GFX950-NEXT: v_mov_b32_e32 v2, 0x7fc00000
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, s0, v0
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX950-NEXT: ;;#ASMSTART
+; GFX950-NEXT: ; use v0
+; GFX950-NEXT: ;;#ASMEND
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: s_maximum_f32:
; GFX10: ; %bb.0:
@@ -517,31 +506,31 @@ define <2 x float> @v_maximum_v2f32(<2 x float> %src0, <2 x float> %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v2f32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v4, v0, v2
-; GFX9-NEXT: v_mov_b32_e32 v5, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX9-NEXT: v_max_f32_e32 v2, v1, v3
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_v2f32:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f32_e32 v4, v0, v2
-; GFX940-NEXT: v_mov_b32_e32 v5, 0x7fc00000
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX940-NEXT: v_max_f32_e32 v2, v1, v3
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v2f32:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f32_e32 v4, v0, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, 0x7fc00000
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
+; GFX900-NEXT: v_max_f32_e32 v2, v1, v3
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v2f32:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f32_e32 v4, v0, v2
+; GFX950-NEXT: v_mov_b32_e32 v5, 0x7fc00000
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX950-NEXT: v_max_f32_e32 v2, v1, v3
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v2f32:
; GFX10: ; %bb.0:
@@ -601,13 +590,6 @@ define <2 x float> @v_maximum_v2f32__nnan(<2 x float> %src0, <2 x float> %src1)
; GFX9-NEXT: v_max_f32_e32 v1, v1, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_maximum_v2f32__nnan:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f32_e32 v0, v0, v2
-; GFX940-NEXT: v_max_f32_e32 v1, v1, v3
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_maximum_v2f32__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -660,31 +642,31 @@ define <2 x float> @v_maximum_v2f32__nsz(<2 x float> %src0, <2 x float> %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v2f32__nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v4, v0, v2
-; GFX9-NEXT: v_mov_b32_e32 v5, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX9-NEXT: v_max_f32_e32 v2, v1, v3
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_v2f32__nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f32_e32 v4, v0, v2
-; GFX940-NEXT: v_mov_b32_e32 v5, 0x7fc00000
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX940-NEXT: v_max_f32_e32 v2, v1, v3
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v2f32__nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f32_e32 v4, v0, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, 0x7fc00000
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
+; GFX900-NEXT: v_max_f32_e32 v2, v1, v3
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v2f32__nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f32_e32 v4, v0, v2
+; GFX950-NEXT: v_mov_b32_e32 v5, 0x7fc00000
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX950-NEXT: v_max_f32_e32 v2, v1, v3
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v2f32__nsz:
; GFX10: ; %bb.0:
@@ -744,13 +726,6 @@ define <2 x float> @v_maximum_v2f32__nnan_nsz(<2 x float> %src0, <2 x float> %sr
; GFX9-NEXT: v_max_f32_e32 v1, v1, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_maximum_v2f32__nnan_nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f32_e32 v0, v0, v2
-; GFX940-NEXT: v_max_f32_e32 v1, v1, v3
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_maximum_v2f32__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -813,40 +788,40 @@ define void @s_maximum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) {
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: s_maximum_v2f32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v0, s19
-; GFX9-NEXT: v_max_f32_e32 v1, s17, v0
-; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s17, v0
-; GFX9-NEXT: v_mov_b32_e32 v0, s18
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX9-NEXT: v_max_f32_e32 v3, s16, v0
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s16, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use v[0:1]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: s_maximum_v2f32:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_mov_b32_e32 v0, s3
-; GFX940-NEXT: v_max_f32_e32 v1, s1, v0
-; GFX940-NEXT: v_mov_b32_e32 v2, 0x7fc00000
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, s1, v0
-; GFX940-NEXT: v_mov_b32_e32 v0, s2
-; GFX940-NEXT: v_max_f32_e32 v3, s0, v0
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, s0, v0
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
-; GFX940-NEXT: ;;#ASMSTART
-; GFX940-NEXT: ; use v[0:1]
-; GFX940-NEXT: ;;#ASMEND
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_maximum_v2f32:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, s19
+; GFX900-NEXT: v_max_f32_e32 v1, s17, v0
+; GFX900-NEXT: v_mov_b32_e32 v2, 0x7fc00000
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, s17, v0
+; GFX900-NEXT: v_mov_b32_e32 v0, s18
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX900-NEXT: v_max_f32_e32 v3, s16, v0
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, s16, v0
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use v[0:1]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: s_maximum_v2f32:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_mov_b32_e32 v0, s3
+; GFX950-NEXT: v_max_f32_e32 v1, s1, v0
+; GFX950-NEXT: v_mov_b32_e32 v2, 0x7fc00000
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, s1, v0
+; GFX950-NEXT: v_mov_b32_e32 v0, s2
+; GFX950-NEXT: v_max_f32_e32 v3, s0, v0
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, s0, v0
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
+; GFX950-NEXT: ;;#ASMSTART
+; GFX950-NEXT: ; use v[0:1]
+; GFX950-NEXT: ;;#ASMEND
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: s_maximum_v2f32:
; GFX10: ; %bb.0:
@@ -927,38 +902,38 @@ define <3 x float> @v_maximum_v3f32(<3 x float> %src0, <3 x float> %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v3f32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v6, v0, v3
-; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX9-NEXT: v_max_f32_e32 v3, v1, v4
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX9-NEXT: v_max_f32_e32 v3, v2, v5
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_v3f32:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f32_e32 v6, v0, v3
-; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX940-NEXT: v_max_f32_e32 v3, v1, v4
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX940-NEXT: v_max_f32_e32 v3, v2, v5
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v3f32:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f32_e32 v6, v0, v3
+; GFX900-NEXT: v_mov_b32_e32 v7, 0x7fc00000
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX900-NEXT: v_max_f32_e32 v3, v1, v4
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
+; GFX900-NEXT: v_max_f32_e32 v3, v2, v5
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
+; GFX900-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v3f32:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f32_e32 v6, v0, v3
+; GFX950-NEXT: v_mov_b32_e32 v7, 0x7fc00000
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
+; GFX950-NEXT: v_max_f32_e32 v3, v1, v4
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
+; GFX950-NEXT: v_max_f32_e32 v3, v2, v5
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v3f32:
; GFX10: ; %bb.0:
@@ -1028,14 +1003,6 @@ define <3 x float> @v_maximum_v3f32__nnan(<3 x float> %src0, <3 x float> %src1)
; GFX9-NEXT: v_max_f32_e32 v2, v2, v5
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_maximum_v3f32__nnan:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f32_e32 v0, v0, v3
-; GFX940-NEXT: v_max_f32_e32 v1, v1, v4
-; GFX940-NEXT: v_max_f32_e32 v2, v2, v5
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_maximum_v3f32__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1097,38 +1064,38 @@ define <3 x float> @v_maximum_v3f32__nsz(<3 x float> %src0, <3 x float> %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v3f32__nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v6, v0, v3
-; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX9-NEXT: v_max_f32_e32 v3, v1, v4
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX9-NEXT: v_max_f32_e32 v3, v2, v5
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_v3f32__nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f32_e32 v6, v0, v3
-; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX940-NEXT: v_max_f32_e32 v3, v1, v4
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX940-NEXT: v_max_f32_e32 v3, v2, v5
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v3f32__nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f32_e32 v6, v0, v3
+; GFX900-NEXT: v_mov_b32_e32 v7, 0x7fc00000
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX900-NEXT: v_max_f32_e32 v3, v1, v4
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
+; GFX900-NEXT: v_max_f32_e32 v3, v2, v5
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
+; GFX900-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v3f32__nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f32_e32 v6, v0, v3
+; GFX950-NEXT: v_mov_b32_e32 v7, 0x7fc00000
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
+; GFX950-NEXT: v_max_f32_e32 v3, v1, v4
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
+; GFX950-NEXT: v_max_f32_e32 v3, v2, v5
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v3f32__nsz:
; GFX10: ; %bb.0:
@@ -1198,14 +1165,6 @@ define <3 x float> @v_maximum_v3f32__nnan_nsz(<3 x float> %src0, <3 x float> %sr
; GFX9-NEXT: v_max_f32_e32 v2, v2, v5
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_maximum_v3f32__nnan_nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f32_e32 v0, v0, v3
-; GFX940-NEXT: v_max_f32_e32 v1, v1, v4
-; GFX940-NEXT: v_max_f32_e32 v2, v2, v5
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_maximum_v3f32__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1273,45 +1232,45 @@ define <4 x float> @v_maximum_v4f32(<4 x float> %src0, <4 x float> %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v4f32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v8, v0, v4
-; GFX9-NEXT: v_mov_b32_e32 v9, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX9-NEXT: v_max_f32_e32 v4, v1, v5
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
-; GFX9-NEXT: v_max_f32_e32 v4, v2, v6
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
-; GFX9-NEXT: v_max_f32_e32 v4, v3, v7
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_v4f32:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f32_e32 v8, v0, v4
-; GFX940-NEXT: v_mov_b32_e32 v9, 0x7fc00000
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
-; GFX940-NEXT: v_max_f32_e32 v4, v1, v5
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
-; GFX940-NEXT: v_max_f32_e32 v4, v2, v6
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
-; GFX940-NEXT: v_max_f32_e32 v4, v3, v7
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v4f32:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f32_e32 v8, v0, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, 0x7fc00000
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
+; GFX900-NEXT: v_max_f32_e32 v4, v1, v5
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
+; GFX900-NEXT: v_max_f32_e32 v4, v2, v6
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
+; GFX900-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
+; GFX900-NEXT: v_max_f32_e32 v4, v3, v7
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
+; GFX900-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v4f32:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f32_e32 v8, v0, v4
+; GFX950-NEXT: v_mov_b32_e32 v9, 0x7fc00000
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
+; GFX950-NEXT: v_max_f32_e32 v4, v1, v5
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
+; GFX950-NEXT: v_max_f32_e32 v4, v2, v6
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
+; GFX950-NEXT: v_max_f32_e32 v4, v3, v7
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v4f32:
; GFX10: ; %bb.0:
@@ -1391,15 +1350,6 @@ define <4 x float> @v_maximum_v4f32__nnan(<4 x float> %src0, <4 x float> %src1)
; GFX9-NEXT: v_max_f32_e32 v3, v3, v7
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_maximum_v4f32__nnan:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f32_e32 v0, v0, v4
-; GFX940-NEXT: v_max_f32_e32 v1, v1, v5
-; GFX940-NEXT: v_max_f32_e32 v2, v2, v6
-; GFX940-NEXT: v_max_f32_e32 v3, v3, v7
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_maximum_v4f32__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1469,45 +1419,45 @@ define <4 x float> @v_maximum_v4f32__nsz(<4 x float> %src0, <4 x float> %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v4f32__nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v8, v0, v4
-; GFX9-NEXT: v_mov_b32_e32 v9, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX9-NEXT: v_max_f32_e32 v4, v1, v5
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
-; GFX9-NEXT: v_max_f32_e32 v4, v2, v6
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
-; GFX9-NEXT: v_max_f32_e32 v4, v3, v7
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_v4f32__nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f32_e32 v8, v0, v4
-; GFX940-NEXT: v_mov_b32_e32 v9, 0x7fc00000
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
-; GFX940-NEXT: v_max_f32_e32 v4, v1, v5
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
-; GFX940-NEXT: v_max_f32_e32 v4, v2, v6
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
-; GFX940-NEXT: v_max_f32_e32 v4, v3, v7
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v4f32__nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f32_e32 v8, v0, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, 0x7fc00000
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
+; GFX900-NEXT: v_max_f32_e32 v4, v1, v5
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
+; GFX900-NEXT: v_max_f32_e32 v4, v2, v6
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
+; GFX900-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
+; GFX900-NEXT: v_max_f32_e32 v4, v3, v7
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
+; GFX900-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v4f32__nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f32_e32 v8, v0, v4
+; GFX950-NEXT: v_mov_b32_e32 v9, 0x7fc00000
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
+; GFX950-NEXT: v_max_f32_e32 v4, v1, v5
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
+; GFX950-NEXT: v_max_f32_e32 v4, v2, v6
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
+; GFX950-NEXT: v_max_f32_e32 v4, v3, v7
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v4f32__nsz:
; GFX10: ; %bb.0:
@@ -1587,15 +1537,6 @@ define <4 x float> @v_maximum_v4f32__nnan_nsz(<4 x float> %src0, <4 x float> %sr
; GFX9-NEXT: v_max_f32_e32 v3, v3, v7
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_maximum_v4f32__nnan_nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f32_e32 v0, v0, v4
-; GFX940-NEXT: v_max_f32_e32 v1, v1, v5
-; GFX940-NEXT: v_max_f32_e32 v2, v2, v6
-; GFX940-NEXT: v_max_f32_e32 v3, v3, v7
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_maximum_v4f32__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1689,73 +1630,73 @@ define <8 x float> @v_maximum_v8f32(<8 x float> %src0, <8 x float> %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v7, v17, v8, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v8f32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v16, v0, v8
-; GFX9-NEXT: v_mov_b32_e32 v17, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v8
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v17, v16, vcc
-; GFX9-NEXT: v_max_f32_e32 v8, v1, v9
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v9
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v17, v8, vcc
-; GFX9-NEXT: v_max_f32_e32 v8, v2, v10
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v10
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v17, v8, vcc
-; GFX9-NEXT: v_max_f32_e32 v8, v3, v11
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v3, v11
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v17, v8, vcc
-; GFX9-NEXT: v_max_f32_e32 v8, v4, v12
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v4, v12
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v17, v8, vcc
-; GFX9-NEXT: v_max_f32_e32 v8, v5, v13
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v5, v13
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v17, v8, vcc
-; GFX9-NEXT: v_max_f32_e32 v8, v6, v14
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v6, v14
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v17, v8, vcc
-; GFX9-NEXT: v_max_f32_e32 v8, v7, v15
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v7, v15
-; GFX9-NEXT: v_cndmask_b32_e32 v7, v17, v8, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_v8f32:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f32_e32 v16, v0, v8
-; GFX940-NEXT: v_mov_b32_e32 v17, 0x7fc00000
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v8
-; GFX940-NEXT: v_max_f32_e32 v8, v1, v9
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v17, v16, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v9
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v17, v8, vcc
-; GFX940-NEXT: v_max_f32_e32 v8, v2, v10
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v10
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v17, v8, vcc
-; GFX940-NEXT: v_max_f32_e32 v8, v3, v11
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v3, v11
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v17, v8, vcc
-; GFX940-NEXT: v_max_f32_e32 v8, v4, v12
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v4, v12
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v17, v8, vcc
-; GFX940-NEXT: v_max_f32_e32 v8, v5, v13
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v5, v13
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v17, v8, vcc
-; GFX940-NEXT: v_max_f32_e32 v8, v6, v14
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v6, v14
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v17, v8, vcc
-; GFX940-NEXT: v_max_f32_e32 v8, v7, v15
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v7, v15
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v17, v8, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v8f32:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f32_e32 v16, v0, v8
+; GFX900-NEXT: v_mov_b32_e32 v17, 0x7fc00000
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v8
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v17, v16, vcc
+; GFX900-NEXT: v_max_f32_e32 v8, v1, v9
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v1, v9
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v17, v8, vcc
+; GFX900-NEXT: v_max_f32_e32 v8, v2, v10
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v2, v10
+; GFX900-NEXT: v_cndmask_b32_e32 v2, v17, v8, vcc
+; GFX900-NEXT: v_max_f32_e32 v8, v3, v11
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v3, v11
+; GFX900-NEXT: v_cndmask_b32_e32 v3, v17, v8, vcc
+; GFX900-NEXT: v_max_f32_e32 v8, v4, v12
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v4, v12
+; GFX900-NEXT: v_cndmask_b32_e32 v4, v17, v8, vcc
+; GFX900-NEXT: v_max_f32_e32 v8, v5, v13
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v5, v13
+; GFX900-NEXT: v_cndmask_b32_e32 v5, v17, v8, vcc
+; GFX900-NEXT: v_max_f32_e32 v8, v6, v14
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v6, v14
+; GFX900-NEXT: v_cndmask_b32_e32 v6, v17, v8, vcc
+; GFX900-NEXT: v_max_f32_e32 v8, v7, v15
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v7, v15
+; GFX900-NEXT: v_cndmask_b32_e32 v7, v17, v8, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v8f32:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f32_e32 v16, v0, v8
+; GFX950-NEXT: v_mov_b32_e32 v17, 0x7fc00000
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v8
+; GFX950-NEXT: v_max_f32_e32 v8, v1, v9
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v17, v16, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v9
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v17, v8, vcc
+; GFX950-NEXT: v_max_f32_e32 v8, v2, v10
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v2, v10
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v2, v17, v8, vcc
+; GFX950-NEXT: v_max_f32_e32 v8, v3, v11
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v3, v11
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v17, v8, vcc
+; GFX950-NEXT: v_max_f32_e32 v8, v4, v12
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v4, v12
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v4, v17, v8, vcc
+; GFX950-NEXT: v_max_f32_e32 v8, v5, v13
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v5, v13
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v5, v17, v8, vcc
+; GFX950-NEXT: v_max_f32_e32 v8, v6, v14
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v6, v14
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v6, v17, v8, vcc
+; GFX950-NEXT: v_max_f32_e32 v8, v7, v15
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v7, v15
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v7, v17, v8, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v8f32:
; GFX10: ; %bb.0:
@@ -1968,136 +1909,136 @@ define <16 x float> @v_maximum_v16f32(<16 x float> %src0, <16 x float> %src1) {
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v16f32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX9-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: v_cmp_o_f32_e64 s[16:17], v0, v16
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v16
-; GFX9-NEXT: buffer_load_dword v16, off, s[0:3], s32
-; GFX9-NEXT: v_writelane_b32 v31, s30, 0
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v17
-; GFX9-NEXT: v_max_f32_e32 v1, v1, v17
-; GFX9-NEXT: v_cmp_o_f32_e64 s[4:5], v2, v18
-; GFX9-NEXT: v_max_f32_e32 v2, v2, v18
-; GFX9-NEXT: v_mov_b32_e32 v17, 0x7fc00000
-; GFX9-NEXT: v_max_f32_e32 v18, v13, v29
-; GFX9-NEXT: v_cmp_o_f32_e64 s[28:29], v13, v29
-; GFX9-NEXT: v_writelane_b32 v31, s31, 1
-; GFX9-NEXT: v_cmp_o_f32_e64 s[6:7], v3, v19
-; GFX9-NEXT: v_max_f32_e32 v3, v3, v19
-; GFX9-NEXT: v_cmp_o_f32_e64 s[8:9], v4, v20
-; GFX9-NEXT: v_max_f32_e32 v4, v4, v20
-; GFX9-NEXT: v_cmp_o_f32_e64 s[10:11], v5, v21
-; GFX9-NEXT: v_max_f32_e32 v5, v5, v21
-; GFX9-NEXT: v_cmp_o_f32_e64 s[12:13], v6, v22
-; GFX9-NEXT: v_max_f32_e32 v6, v6, v22
-; GFX9-NEXT: v_cmp_o_f32_e64 s[14:15], v7, v23
-; GFX9-NEXT: v_max_f32_e32 v7, v7, v23
-; GFX9-NEXT: v_cmp_o_f32_e64 s[18:19], v8, v24
-; GFX9-NEXT: v_max_f32_e32 v8, v8, v24
-; GFX9-NEXT: v_cmp_o_f32_e64 s[20:21], v9, v25
-; GFX9-NEXT: v_max_f32_e32 v9, v9, v25
-; GFX9-NEXT: v_cmp_o_f32_e64 s[22:23], v10, v26
-; GFX9-NEXT: v_max_f32_e32 v10, v10, v26
-; GFX9-NEXT: v_cmp_o_f32_e64 s[24:25], v11, v27
-; GFX9-NEXT: v_max_f32_e32 v11, v11, v27
-; GFX9-NEXT: v_cmp_o_f32_e64 s[26:27], v12, v28
-; GFX9-NEXT: v_max_f32_e32 v12, v12, v28
-; GFX9-NEXT: v_max_f32_e32 v19, v14, v30
-; GFX9-NEXT: v_cmp_o_f32_e64 s[30:31], v14, v30
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v13, v17, v18, s[28:29]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v17, v0, s[16:17]
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v17, v2, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v17, v3, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v17, v4, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v17, v5, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v17, v6, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v17, v7, s[14:15]
-; GFX9-NEXT: v_cndmask_b32_e64 v8, v17, v8, s[18:19]
-; GFX9-NEXT: v_cndmask_b32_e64 v9, v17, v9, s[20:21]
-; GFX9-NEXT: v_cndmask_b32_e64 v10, v17, v10, s[22:23]
-; GFX9-NEXT: v_cndmask_b32_e64 v11, v17, v11, s[24:25]
-; GFX9-NEXT: v_cndmask_b32_e64 v12, v17, v12, s[26:27]
-; GFX9-NEXT: v_cndmask_b32_e64 v14, v17, v19, s[30:31]
-; GFX9-NEXT: v_readlane_b32 s31, v31, 1
-; GFX9-NEXT: v_readlane_b32 s30, v31, 0
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v18, v15, v16
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v15, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v15, v17, v18, vcc
-; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_v16f32:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: scratch_load_dword v31, off, s32
-; GFX940-NEXT: v_mov_b32_e32 v32, 0x7fc00000
-; GFX940-NEXT: v_max_f32_e32 v33, v0, v16
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v16
-; GFX940-NEXT: v_max_f32_e32 v34, v1, v17
-; GFX940-NEXT: v_max_f32_e32 v35, v2, v18
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v32, v33, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v17
-; GFX940-NEXT: v_max_f32_e32 v36, v3, v19
-; GFX940-NEXT: v_max_f32_e32 v37, v4, v20
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v32, v34, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v18
-; GFX940-NEXT: v_max_f32_e32 v38, v5, v21
-; GFX940-NEXT: v_max_f32_e32 v39, v6, v22
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v32, v35, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v3, v19
-; GFX940-NEXT: v_max_f32_e32 v48, v7, v23
-; GFX940-NEXT: v_max_f32_e32 v49, v8, v24
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v32, v36, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v4, v20
-; GFX940-NEXT: v_max_f32_e32 v50, v9, v25
-; GFX940-NEXT: v_max_f32_e32 v51, v10, v26
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v32, v37, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v5, v21
-; GFX940-NEXT: v_max_f32_e32 v52, v11, v27
-; GFX940-NEXT: v_max_f32_e32 v53, v12, v28
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v32, v38, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v6, v22
-; GFX940-NEXT: v_max_f32_e32 v54, v13, v29
-; GFX940-NEXT: v_max_f32_e32 v55, v14, v30
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v32, v39, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v7, v23
-; GFX940-NEXT: s_waitcnt vmcnt(0)
-; GFX940-NEXT: v_max_f32_e32 v16, v15, v31
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v32, v48, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v8, v24
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v32, v49, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v9, v25
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v9, v32, v50, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v10, v26
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v10, v32, v51, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v11, v27
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v11, v32, v52, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v12, v28
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v12, v32, v53, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v13, v29
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v13, v32, v54, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v14, v30
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v14, v32, v55, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v15, v31
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v15, v32, v16, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v16f32:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX900-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; GFX900-NEXT: s_mov_b64 exec, s[4:5]
+; GFX900-NEXT: v_cmp_o_f32_e64 s[16:17], v0, v16
+; GFX900-NEXT: v_max_f32_e32 v0, v0, v16
+; GFX900-NEXT: buffer_load_dword v16, off, s[0:3], s32
+; GFX900-NEXT: v_writelane_b32 v31, s30, 0
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v1, v17
+; GFX900-NEXT: v_max_f32_e32 v1, v1, v17
+; GFX900-NEXT: v_cmp_o_f32_e64 s[4:5], v2, v18
+; GFX900-NEXT: v_max_f32_e32 v2, v2, v18
+; GFX900-NEXT: v_mov_b32_e32 v17, 0x7fc00000
+; GFX900-NEXT: v_max_f32_e32 v18, v13, v29
+; GFX900-NEXT: v_cmp_o_f32_e64 s[28:29], v13, v29
+; GFX900-NEXT: v_writelane_b32 v31, s31, 1
+; GFX900-NEXT: v_cmp_o_f32_e64 s[6:7], v3, v19
+; GFX900-NEXT: v_max_f32_e32 v3, v3, v19
+; GFX900-NEXT: v_cmp_o_f32_e64 s[8:9], v4, v20
+; GFX900-NEXT: v_max_f32_e32 v4, v4, v20
+; GFX900-NEXT: v_cmp_o_f32_e64 s[10:11], v5, v21
+; GFX900-NEXT: v_max_f32_e32 v5, v5, v21
+; GFX900-NEXT: v_cmp_o_f32_e64 s[12:13], v6, v22
+; GFX900-NEXT: v_max_f32_e32 v6, v6, v22
+; GFX900-NEXT: v_cmp_o_f32_e64 s[14:15], v7, v23
+; GFX900-NEXT: v_max_f32_e32 v7, v7, v23
+; GFX900-NEXT: v_cmp_o_f32_e64 s[18:19], v8, v24
+; GFX900-NEXT: v_max_f32_e32 v8, v8, v24
+; GFX900-NEXT: v_cmp_o_f32_e64 s[20:21], v9, v25
+; GFX900-NEXT: v_max_f32_e32 v9, v9, v25
+; GFX900-NEXT: v_cmp_o_f32_e64 s[22:23], v10, v26
+; GFX900-NEXT: v_max_f32_e32 v10, v10, v26
+; GFX900-NEXT: v_cmp_o_f32_e64 s[24:25], v11, v27
+; GFX900-NEXT: v_max_f32_e32 v11, v11, v27
+; GFX900-NEXT: v_cmp_o_f32_e64 s[26:27], v12, v28
+; GFX900-NEXT: v_max_f32_e32 v12, v12, v28
+; GFX900-NEXT: v_max_f32_e32 v19, v14, v30
+; GFX900-NEXT: v_cmp_o_f32_e64 s[30:31], v14, v30
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc
+; GFX900-NEXT: v_cndmask_b32_e64 v13, v17, v18, s[28:29]
+; GFX900-NEXT: v_cndmask_b32_e64 v0, v17, v0, s[16:17]
+; GFX900-NEXT: v_cndmask_b32_e64 v2, v17, v2, s[4:5]
+; GFX900-NEXT: v_cndmask_b32_e64 v3, v17, v3, s[6:7]
+; GFX900-NEXT: v_cndmask_b32_e64 v4, v17, v4, s[8:9]
+; GFX900-NEXT: v_cndmask_b32_e64 v5, v17, v5, s[10:11]
+; GFX900-NEXT: v_cndmask_b32_e64 v6, v17, v6, s[12:13]
+; GFX900-NEXT: v_cndmask_b32_e64 v7, v17, v7, s[14:15]
+; GFX900-NEXT: v_cndmask_b32_e64 v8, v17, v8, s[18:19]
+; GFX900-NEXT: v_cndmask_b32_e64 v9, v17, v9, s[20:21]
+; GFX900-NEXT: v_cndmask_b32_e64 v10, v17, v10, s[22:23]
+; GFX900-NEXT: v_cndmask_b32_e64 v11, v17, v11, s[24:25]
+; GFX900-NEXT: v_cndmask_b32_e64 v12, v17, v12, s[26:27]
+; GFX900-NEXT: v_cndmask_b32_e64 v14, v17, v19, s[30:31]
+; GFX900-NEXT: v_readlane_b32 s31, v31, 1
+; GFX900-NEXT: v_readlane_b32 s30, v31, 0
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_max_f32_e32 v18, v15, v16
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v15, v16
+; GFX900-NEXT: v_cndmask_b32_e32 v15, v17, v18, vcc
+; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; GFX900-NEXT: s_mov_b64 exec, s[4:5]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v16f32:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: scratch_load_dword v31, off, s32
+; GFX950-NEXT: v_mov_b32_e32 v32, 0x7fc00000
+; GFX950-NEXT: v_max_f32_e32 v33, v0, v16
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v16
+; GFX950-NEXT: v_max_f32_e32 v34, v1, v17
+; GFX950-NEXT: v_max_f32_e32 v35, v2, v18
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v32, v33, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v17
+; GFX950-NEXT: v_max_f32_e32 v36, v3, v19
+; GFX950-NEXT: v_max_f32_e32 v37, v4, v20
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v32, v34, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v2, v18
+; GFX950-NEXT: v_max_f32_e32 v38, v5, v21
+; GFX950-NEXT: v_max_f32_e32 v39, v6, v22
+; GFX950-NEXT: v_cndmask_b32_e32 v2, v32, v35, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v3, v19
+; GFX950-NEXT: v_max_f32_e32 v48, v7, v23
+; GFX950-NEXT: v_max_f32_e32 v49, v8, v24
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v32, v36, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v4, v20
+; GFX950-NEXT: v_max_f32_e32 v50, v9, v25
+; GFX950-NEXT: v_max_f32_e32 v51, v10, v26
+; GFX950-NEXT: v_cndmask_b32_e32 v4, v32, v37, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v5, v21
+; GFX950-NEXT: v_max_f32_e32 v52, v11, v27
+; GFX950-NEXT: v_max_f32_e32 v53, v12, v28
+; GFX950-NEXT: v_cndmask_b32_e32 v5, v32, v38, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v6, v22
+; GFX950-NEXT: v_max_f32_e32 v54, v13, v29
+; GFX950-NEXT: v_max_f32_e32 v55, v14, v30
+; GFX950-NEXT: v_cndmask_b32_e32 v6, v32, v39, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v7, v23
+; GFX950-NEXT: s_waitcnt vmcnt(0)
+; GFX950-NEXT: v_max_f32_e32 v16, v15, v31
+; GFX950-NEXT: v_cndmask_b32_e32 v7, v32, v48, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v8, v24
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v8, v32, v49, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v9, v25
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v9, v32, v50, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v10, v26
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v10, v32, v51, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v11, v27
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v11, v32, v52, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v12, v28
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v12, v32, v53, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v13, v29
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v13, v32, v54, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v14, v30
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v14, v32, v55, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v15, v31
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v15, v32, v16, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v16f32:
; GFX10: ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll
index 80a0a194713d90..e354ec6fb3dd78 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll
@@ -2,7 +2,8 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck -check-prefixes=GCN,GFX7 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GCN,GFX8 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GCN,GFX940 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX900 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX950 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX10 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GCN,GFX12 %s
@@ -28,26 +29,26 @@ define double @v_maximum_f64(double %src0, double %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_f64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX9-NEXT: v_mov_b32_e32 v1, 0x7ff80000
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_f64:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX940-NEXT: v_mov_b32_e32 v1, 0x7ff80000
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_f64:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
+; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX900-NEXT: v_mov_b32_e32 v1, 0x7ff80000
+; GFX900-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_f64:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX950-NEXT: v_mov_b32_e32 v1, 0x7ff80000
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_f64:
; GFX10: ; %bb.0:
@@ -100,12 +101,6 @@ define double @v_maximum_f64__nnan(double %src0, double %src1) {
; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_maximum_f64__nnan:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_maximum_f64__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -152,26 +147,26 @@ define double @v_maximum_f64__nsz(double %src0, double %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_f64__nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX9-NEXT: v_mov_b32_e32 v1, 0x7ff80000
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_f64__nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX940-NEXT: v_mov_b32_e32 v1, 0x7ff80000
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_f64__nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
+; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX900-NEXT: v_mov_b32_e32 v1, 0x7ff80000
+; GFX900-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_f64__nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX950-NEXT: v_mov_b32_e32 v1, 0x7ff80000
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_f64__nsz:
; GFX10: ; %bb.0:
@@ -224,12 +219,6 @@ define double @v_maximum_f64__nnan_nsz(double %src0, double %src1) {
; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_maximum_f64__nnan_nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_maximum_f64__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -278,28 +267,28 @@ define double @v_maximum_f64__nnan_src0(double %arg0, double %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_f64__nnan_src0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_add_f64 v[0:1], v[0:1], 1.0
-; GFX9-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX9-NEXT: v_mov_b32_e32 v1, 0x7ff80000
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_f64__nnan_src0:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_add_f64 v[0:1], v[0:1], 1.0
-; GFX940-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX940-NEXT: v_mov_b32_e32 v1, 0x7ff80000
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_f64__nnan_src0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_add_f64 v[0:1], v[0:1], 1.0
+; GFX900-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
+; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX900-NEXT: v_mov_b32_e32 v1, 0x7ff80000
+; GFX900-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_f64__nnan_src0:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_add_f64 v[0:1], v[0:1], 1.0
+; GFX950-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX950-NEXT: v_mov_b32_e32 v1, 0x7ff80000
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_f64__nnan_src0:
; GFX10: ; %bb.0:
@@ -362,28 +351,28 @@ define double @v_maximum_f64__nnan_src1(double %src0, double %arg1) {
; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_f64__nnan_src1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_add_f64 v[2:3], v[2:3], 1.0
-; GFX9-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX9-NEXT: v_mov_b32_e32 v1, 0x7ff80000
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_f64__nnan_src1:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_add_f64 v[2:3], v[2:3], 1.0
-; GFX940-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX940-NEXT: v_mov_b32_e32 v1, 0x7ff80000
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_f64__nnan_src1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_add_f64 v[2:3], v[2:3], 1.0
+; GFX900-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
+; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX900-NEXT: v_mov_b32_e32 v1, 0x7ff80000
+; GFX900-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_f64__nnan_src1:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_add_f64 v[2:3], v[2:3], 1.0
+; GFX950-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX950-NEXT: v_mov_b32_e32 v1, 0x7ff80000
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_f64__nnan_src1:
; GFX10: ; %bb.0:
@@ -454,35 +443,35 @@ define void @s_maximum_f64(double inreg %src0, double inreg %src1) {
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: s_maximum_f64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v0, s18
-; GFX9-NEXT: v_mov_b32_e32 v1, s19
-; GFX9-NEXT: v_max_f64 v[2:3], s[16:17], v[0:1]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, s[16:17], v[0:1]
-; GFX9-NEXT: v_mov_b32_e32 v4, 0x7ff80000
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use v[0:1]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: s_maximum_f64:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
-; GFX940-NEXT: v_max_f64 v[2:3], s[0:1], v[0:1]
-; GFX940-NEXT: v_mov_b32_e32 v4, 0x7ff80000
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, s[0:1], v[0:1]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX940-NEXT: ;;#ASMSTART
-; GFX940-NEXT: ; use v[0:1]
-; GFX940-NEXT: ;;#ASMEND
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_maximum_f64:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, s18
+; GFX900-NEXT: v_mov_b32_e32 v1, s19
+; GFX900-NEXT: v_max_f64 v[2:3], s[16:17], v[0:1]
+; GFX900-NEXT: v_cmp_u_f64_e32 vcc, s[16:17], v[0:1]
+; GFX900-NEXT: v_mov_b32_e32 v4, 0x7ff80000
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX900-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use v[0:1]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: s_maximum_f64:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
+; GFX950-NEXT: v_max_f64 v[2:3], s[0:1], v[0:1]
+; GFX950-NEXT: v_mov_b32_e32 v4, 0x7ff80000
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, s[0:1], v[0:1]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX950-NEXT: ;;#ASMSTART
+; GFX950-NEXT: ; use v[0:1]
+; GFX950-NEXT: ;;#ASMEND
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: s_maximum_f64:
; GFX10: ; %bb.0:
@@ -555,35 +544,35 @@ define <2 x double> @v_maximum_v2f64(<2 x double> %src0, <2 x double> %src1) {
; GFX8-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v2f64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX9-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x7ff80000
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_v2f64:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX940-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
-; GFX940-NEXT: v_mov_b32_e32 v8, 0x7ff80000
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v9, v8, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[6:7]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v5, v8, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v2f64:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
+; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX900-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
+; GFX900-NEXT: v_mov_b32_e32 v3, 0x7ff80000
+; GFX900-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc
+; GFX900-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5]
+; GFX900-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v2f64:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX950-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
+; GFX950-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v9, v8, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[6:7]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v5, v8, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v2f64:
; GFX10: ; %bb.0:
@@ -648,13 +637,6 @@ define <2 x double> @v_maximum_v2f64__nnan(<2 x double> %src0, <2 x double> %src
; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_maximum_v2f64__nnan:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
-; GFX940-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_maximum_v2f64__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -712,35 +694,35 @@ define <2 x double> @v_maximum_v2f64__nsz(<2 x double> %src0, <2 x double> %src1
; GFX8-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v2f64__nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX9-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x7ff80000
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_v2f64__nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX940-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
-; GFX940-NEXT: v_mov_b32_e32 v8, 0x7ff80000
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v9, v8, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[6:7]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v5, v8, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v2f64__nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
+; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX900-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
+; GFX900-NEXT: v_mov_b32_e32 v3, 0x7ff80000
+; GFX900-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc
+; GFX900-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5]
+; GFX900-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v2f64__nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX950-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
+; GFX950-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v9, v8, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[6:7]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v5, v8, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v2f64__nsz:
; GFX10: ; %bb.0:
@@ -805,13 +787,6 @@ define <2 x double> @v_maximum_v2f64__nnan_nsz(<2 x double> %src0, <2 x double>
; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_maximum_v2f64__nnan_nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
-; GFX940-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_maximum_v2f64__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -883,46 +858,46 @@ define void @s_maximum_v2f64(<2 x double> inreg %src0, <2 x double> inreg %src1)
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: s_maximum_v2f64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v0, s22
-; GFX9-NEXT: v_mov_b32_e32 v4, s20
-; GFX9-NEXT: v_mov_b32_e32 v1, s23
-; GFX9-NEXT: v_mov_b32_e32 v5, s21
-; GFX9-NEXT: v_max_f64 v[2:3], s[18:19], v[0:1]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, s[18:19], v[0:1]
-; GFX9-NEXT: v_max_f64 v[0:1], s[16:17], v[4:5]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], s[16:17], v[4:5]
-; GFX9-NEXT: v_mov_b32_e32 v6, 0x7ff80000
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, 0, s[4:5]
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use v[0:3]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: s_maximum_v2f64:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[18:19]
-; GFX940-NEXT: v_max_f64 v[2:3], s[2:3], v[0:1]
-; GFX940-NEXT: v_mov_b32_e32 v6, 0x7ff80000
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, s[2:3], v[0:1]
-; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[16:17]
-; GFX940-NEXT: v_max_f64 v[4:5], s[0:1], v[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, s[0:1], v[0:1]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX940-NEXT: ;;#ASMSTART
-; GFX940-NEXT: ; use v[0:3]
-; GFX940-NEXT: ;;#ASMEND
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_maximum_v2f64:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, s22
+; GFX900-NEXT: v_mov_b32_e32 v4, s20
+; GFX900-NEXT: v_mov_b32_e32 v1, s23
+; GFX900-NEXT: v_mov_b32_e32 v5, s21
+; GFX900-NEXT: v_max_f64 v[2:3], s[18:19], v[0:1]
+; GFX900-NEXT: v_cmp_u_f64_e32 vcc, s[18:19], v[0:1]
+; GFX900-NEXT: v_max_f64 v[0:1], s[16:17], v[4:5]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[4:5], s[16:17], v[4:5]
+; GFX900-NEXT: v_mov_b32_e32 v6, 0x7ff80000
+; GFX900-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc
+; GFX900-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
+; GFX900-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[4:5]
+; GFX900-NEXT: v_cndmask_b32_e64 v0, v0, 0, s[4:5]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use v[0:3]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: s_maximum_v2f64:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[18:19]
+; GFX950-NEXT: v_max_f64 v[2:3], s[2:3], v[0:1]
+; GFX950-NEXT: v_mov_b32_e32 v6, 0x7ff80000
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, s[2:3], v[0:1]
+; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[16:17]
+; GFX950-NEXT: v_max_f64 v[4:5], s[0:1], v[0:1]
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, s[0:1], v[0:1]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
+; GFX950-NEXT: ;;#ASMSTART
+; GFX950-NEXT: ; use v[0:3]
+; GFX950-NEXT: ;;#ASMEND
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: s_maximum_v2f64:
; GFX10: ; %bb.0:
@@ -1012,44 +987,44 @@ define <3 x double> @v_maximum_v3f64(<3 x double> %src0, <3 x double> %src1) {
; GFX8-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v3f64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX9-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
-; GFX9-NEXT: v_max_f64 v[8:9], v[4:5], v[10:11]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
-; GFX9-NEXT: v_mov_b32_e32 v5, 0x7ff80000
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_v3f64:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX940-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
-; GFX940-NEXT: v_mov_b32_e32 v12, 0x7ff80000
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v13, v12, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[8:9]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v12, vcc
-; GFX940-NEXT: v_max_f64 v[6:7], v[4:5], v[10:11]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[10:11]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v6, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v7, v12, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v3f64:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
+; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX900-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
+; GFX900-NEXT: v_max_f64 v[8:9], v[4:5], v[10:11]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
+; GFX900-NEXT: v_mov_b32_e32 v5, 0x7ff80000
+; GFX900-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc
+; GFX900-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5]
+; GFX900-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5]
+; GFX900-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7]
+; GFX900-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v3f64:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX950-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
+; GFX950-NEXT: v_mov_b32_e32 v12, 0x7ff80000
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v13, v12, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[8:9]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v7, v12, vcc
+; GFX950-NEXT: v_max_f64 v[6:7], v[4:5], v[10:11]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[10:11]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v4, v6, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v5, v7, v12, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v3f64:
; GFX10: ; %bb.0:
@@ -1125,14 +1100,6 @@ define <3 x double> @v_maximum_v3f64__nnan(<3 x double> %src0, <3 x double> %src
; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_maximum_v3f64__nnan:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
-; GFX940-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
-; GFX940-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_maximum_v3f64__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1201,44 +1168,44 @@ define <3 x double> @v_maximum_v3f64__nsz(<3 x double> %src0, <3 x double> %src1
; GFX8-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v3f64__nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX9-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
-; GFX9-NEXT: v_max_f64 v[8:9], v[4:5], v[10:11]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
-; GFX9-NEXT: v_mov_b32_e32 v5, 0x7ff80000
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_v3f64__nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX940-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
-; GFX940-NEXT: v_mov_b32_e32 v12, 0x7ff80000
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v13, v12, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[8:9]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v12, vcc
-; GFX940-NEXT: v_max_f64 v[6:7], v[4:5], v[10:11]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[10:11]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v6, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v7, v12, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v3f64__nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
+; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX900-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
+; GFX900-NEXT: v_max_f64 v[8:9], v[4:5], v[10:11]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
+; GFX900-NEXT: v_mov_b32_e32 v5, 0x7ff80000
+; GFX900-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc
+; GFX900-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5]
+; GFX900-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5]
+; GFX900-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7]
+; GFX900-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v3f64__nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX950-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
+; GFX950-NEXT: v_mov_b32_e32 v12, 0x7ff80000
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v13, v12, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[8:9]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v7, v12, vcc
+; GFX950-NEXT: v_max_f64 v[6:7], v[4:5], v[10:11]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[10:11]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v4, v6, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v5, v7, v12, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v3f64__nsz:
; GFX10: ; %bb.0:
@@ -1314,14 +1281,6 @@ define <3 x double> @v_maximum_v3f64__nnan_nsz(<3 x double> %src0, <3 x double>
; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_maximum_v3f64__nnan_nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
-; GFX940-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
-; GFX940-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_maximum_v3f64__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1398,53 +1357,53 @@ define <4 x double> @v_maximum_v4f64(<4 x double> %src0, <4 x double> %src1) {
; GFX8-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v4f64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
-; GFX9-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
-; GFX9-NEXT: v_max_f64 v[10:11], v[4:5], v[12:13]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
-; GFX9-NEXT: v_max_f64 v[12:13], v[6:7], v[14:15]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
-; GFX9-NEXT: v_mov_b32_e32 v7, 0x7ff80000
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_v4f64:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
-; GFX940-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
-; GFX940-NEXT: v_mov_b32_e32 v16, 0x7ff80000
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v17, v16, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[10:11]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v8, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v9, v16, vcc
-; GFX940-NEXT: v_max_f64 v[8:9], v[4:5], v[12:13]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[12:13]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v8, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v9, v16, vcc
-; GFX940-NEXT: v_max_f64 v[8:9], v[6:7], v[14:15]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[14:15]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v6, v8, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v9, v16, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v4f64:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
+; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX900-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
+; GFX900-NEXT: v_max_f64 v[10:11], v[4:5], v[12:13]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
+; GFX900-NEXT: v_max_f64 v[12:13], v[6:7], v[14:15]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
+; GFX900-NEXT: v_mov_b32_e32 v7, 0x7ff80000
+; GFX900-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc
+; GFX900-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5]
+; GFX900-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5]
+; GFX900-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7]
+; GFX900-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7]
+; GFX900-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9]
+; GFX900-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v4f64:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX950-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
+; GFX950-NEXT: v_mov_b32_e32 v16, 0x7ff80000
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v17, v16, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[10:11]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v8, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v9, v16, vcc
+; GFX950-NEXT: v_max_f64 v[8:9], v[4:5], v[12:13]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[12:13]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v4, v8, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v5, v9, v16, vcc
+; GFX950-NEXT: v_max_f64 v[8:9], v[6:7], v[14:15]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[14:15]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v6, v8, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v7, v9, v16, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v4f64:
; GFX10: ; %bb.0:
@@ -1532,15 +1491,6 @@ define <4 x double> @v_maximum_v4f64__nnan(<4 x double> %src0, <4 x double> %src
; GFX9-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_maximum_v4f64__nnan:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
-; GFX940-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
-; GFX940-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
-; GFX940-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_maximum_v4f64__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1620,53 +1570,53 @@ define <4 x double> @v_maximum_v4f64__nsz(<4 x double> %src0, <4 x double> %src1
; GFX8-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v4f64__nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
-; GFX9-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
-; GFX9-NEXT: v_max_f64 v[10:11], v[4:5], v[12:13]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
-; GFX9-NEXT: v_max_f64 v[12:13], v[6:7], v[14:15]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
-; GFX9-NEXT: v_mov_b32_e32 v7, 0x7ff80000
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_v4f64__nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
-; GFX940-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
-; GFX940-NEXT: v_mov_b32_e32 v16, 0x7ff80000
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v17, v16, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[10:11]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v8, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v9, v16, vcc
-; GFX940-NEXT: v_max_f64 v[8:9], v[4:5], v[12:13]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[12:13]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v8, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v9, v16, vcc
-; GFX940-NEXT: v_max_f64 v[8:9], v[6:7], v[14:15]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[14:15]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v6, v8, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v9, v16, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v4f64__nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
+; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX900-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
+; GFX900-NEXT: v_max_f64 v[10:11], v[4:5], v[12:13]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
+; GFX900-NEXT: v_max_f64 v[12:13], v[6:7], v[14:15]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
+; GFX900-NEXT: v_mov_b32_e32 v7, 0x7ff80000
+; GFX900-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc
+; GFX900-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5]
+; GFX900-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5]
+; GFX900-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7]
+; GFX900-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7]
+; GFX900-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9]
+; GFX900-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v4f64__nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX950-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
+; GFX950-NEXT: v_mov_b32_e32 v16, 0x7ff80000
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v17, v16, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[10:11]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v8, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v9, v16, vcc
+; GFX950-NEXT: v_max_f64 v[8:9], v[4:5], v[12:13]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[12:13]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v4, v8, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v5, v9, v16, vcc
+; GFX950-NEXT: v_max_f64 v[8:9], v[6:7], v[14:15]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[14:15]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v6, v8, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v7, v9, v16, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v4f64__nsz:
; GFX10: ; %bb.0:
@@ -1754,15 +1704,6 @@ define <4 x double> @v_maximum_v4f64__nnan_nsz(<4 x double> %src0, <4 x double>
; GFX9-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_maximum_v4f64__nnan_nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
-; GFX940-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
-; GFX940-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
-; GFX940-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_maximum_v4f64__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1878,89 +1819,89 @@ define <8 x double> @v_maximum_v8f64(<8 x double> %src0, <8 x double> %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v15, v19, v34, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v8f64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32
-; GFX9-NEXT: v_max_f64 v[32:33], v[2:3], v[18:19]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[18:19]
-; GFX9-NEXT: v_max_f64 v[18:19], v[4:5], v[20:21]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[4:5], v[20:21]
-; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[16:17]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[8:9], v[0:1], v[16:17]
-; GFX9-NEXT: v_mov_b32_e32 v34, 0x7ff80000
-; GFX9-NEXT: v_max_f64 v[20:21], v[6:7], v[22:23]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[6:7], v[6:7], v[22:23]
-; GFX9-NEXT: v_max_f64 v[16:17], v[8:9], v[24:25]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[24:25]
-; GFX9-NEXT: v_max_f64 v[22:23], v[10:11], v[26:27]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[12:13], v[10:11], v[26:27]
-; GFX9-NEXT: v_max_f64 v[24:25], v[12:13], v[28:29]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[14:15], v[12:13], v[28:29]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v3, v34, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v32, 0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v33, v34, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v18, 0, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v19, v34, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v20, 0, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v21, v34, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v8, v16, 0, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v9, v17, v34, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v10, v22, 0, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v11, v23, v34, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v12, v24, 0, s[14:15]
-; GFX9-NEXT: v_cndmask_b32_e64 v13, v25, v34, s[14:15]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_max_f64 v[18:19], v[14:15], v[30:31]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[30:31]
-; GFX9-NEXT: v_cndmask_b32_e64 v14, v18, 0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v15, v19, v34, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_v8f64:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: scratch_load_dword v31, off, s32
-; GFX940-NEXT: v_mov_b32_e32 v54, 0x7ff80000
-; GFX940-NEXT: v_max_f64 v[32:33], v[0:1], v[16:17]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[16:17]
-; GFX940-NEXT: v_max_f64 v[34:35], v[2:3], v[18:19]
-; GFX940-NEXT: v_max_f64 v[36:37], v[4:5], v[20:21]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v32, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v33, v54, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[18:19]
-; GFX940-NEXT: v_max_f64 v[38:39], v[6:7], v[22:23]
-; GFX940-NEXT: v_max_f64 v[48:49], v[8:9], v[24:25]
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v34, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v35, v54, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[20:21]
-; GFX940-NEXT: v_max_f64 v[50:51], v[10:11], v[26:27]
-; GFX940-NEXT: v_max_f64 v[52:53], v[12:13], v[28:29]
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v36, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v37, v54, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[22:23]
-; GFX940-NEXT: s_waitcnt vmcnt(0)
-; GFX940-NEXT: v_max_f64 v[16:17], v[14:15], v[30:31]
-; GFX940-NEXT: v_cndmask_b32_e64 v6, v38, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v39, v54, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[8:9], v[24:25]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v8, v48, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v9, v49, v54, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[10:11], v[26:27]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v10, v50, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v11, v51, v54, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[12:13], v[28:29]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v12, v52, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v13, v53, v54, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[30:31]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v14, v16, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v15, v17, v54, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v8f64:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32
+; GFX900-NEXT: v_max_f64 v[32:33], v[2:3], v[18:19]
+; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[18:19]
+; GFX900-NEXT: v_max_f64 v[18:19], v[4:5], v[20:21]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[4:5], v[4:5], v[20:21]
+; GFX900-NEXT: v_max_f64 v[2:3], v[0:1], v[16:17]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[8:9], v[0:1], v[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v34, 0x7ff80000
+; GFX900-NEXT: v_max_f64 v[20:21], v[6:7], v[22:23]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[6:7], v[6:7], v[22:23]
+; GFX900-NEXT: v_max_f64 v[16:17], v[8:9], v[24:25]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[24:25]
+; GFX900-NEXT: v_max_f64 v[22:23], v[10:11], v[26:27]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[12:13], v[10:11], v[26:27]
+; GFX900-NEXT: v_max_f64 v[24:25], v[12:13], v[28:29]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[14:15], v[12:13], v[28:29]
+; GFX900-NEXT: v_cndmask_b32_e64 v0, v2, 0, s[8:9]
+; GFX900-NEXT: v_cndmask_b32_e64 v1, v3, v34, s[8:9]
+; GFX900-NEXT: v_cndmask_b32_e64 v2, v32, 0, vcc
+; GFX900-NEXT: v_cndmask_b32_e32 v3, v33, v34, vcc
+; GFX900-NEXT: v_cndmask_b32_e64 v4, v18, 0, s[4:5]
+; GFX900-NEXT: v_cndmask_b32_e64 v5, v19, v34, s[4:5]
+; GFX900-NEXT: v_cndmask_b32_e64 v6, v20, 0, s[6:7]
+; GFX900-NEXT: v_cndmask_b32_e64 v7, v21, v34, s[6:7]
+; GFX900-NEXT: v_cndmask_b32_e64 v8, v16, 0, s[10:11]
+; GFX900-NEXT: v_cndmask_b32_e64 v9, v17, v34, s[10:11]
+; GFX900-NEXT: v_cndmask_b32_e64 v10, v22, 0, s[12:13]
+; GFX900-NEXT: v_cndmask_b32_e64 v11, v23, v34, s[12:13]
+; GFX900-NEXT: v_cndmask_b32_e64 v12, v24, 0, s[14:15]
+; GFX900-NEXT: v_cndmask_b32_e64 v13, v25, v34, s[14:15]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_max_f64 v[18:19], v[14:15], v[30:31]
+; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[30:31]
+; GFX900-NEXT: v_cndmask_b32_e64 v14, v18, 0, vcc
+; GFX900-NEXT: v_cndmask_b32_e32 v15, v19, v34, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v8f64:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: scratch_load_dword v31, off, s32
+; GFX950-NEXT: v_mov_b32_e32 v54, 0x7ff80000
+; GFX950-NEXT: v_max_f64 v[32:33], v[0:1], v[16:17]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[16:17]
+; GFX950-NEXT: v_max_f64 v[34:35], v[2:3], v[18:19]
+; GFX950-NEXT: v_max_f64 v[36:37], v[4:5], v[20:21]
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v32, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v33, v54, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[18:19]
+; GFX950-NEXT: v_max_f64 v[38:39], v[6:7], v[22:23]
+; GFX950-NEXT: v_max_f64 v[48:49], v[8:9], v[24:25]
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v34, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v35, v54, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[20:21]
+; GFX950-NEXT: v_max_f64 v[50:51], v[10:11], v[26:27]
+; GFX950-NEXT: v_max_f64 v[52:53], v[12:13], v[28:29]
+; GFX950-NEXT: v_cndmask_b32_e64 v4, v36, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v5, v37, v54, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[22:23]
+; GFX950-NEXT: s_waitcnt vmcnt(0)
+; GFX950-NEXT: v_max_f64 v[16:17], v[14:15], v[30:31]
+; GFX950-NEXT: v_cndmask_b32_e64 v6, v38, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v7, v39, v54, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[8:9], v[24:25]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v8, v48, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v9, v49, v54, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[10:11], v[26:27]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v10, v50, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v11, v51, v54, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[12:13], v[28:29]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v12, v52, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v13, v53, v54, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[30:31]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v14, v16, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v15, v17, v54, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v8f64:
; GFX10: ; %bb.0:
@@ -2332,295 +2273,295 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1)
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v16f64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX9-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4
-; GFX9-NEXT: v_writelane_b32 v34, s30, 0
-; GFX9-NEXT: v_writelane_b32 v34, s31, 1
-; GFX9-NEXT: v_writelane_b32 v34, s34, 2
-; GFX9-NEXT: v_writelane_b32 v34, s35, 3
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[31:32]
-; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[31:32]
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:16
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:12
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[31:32]
-; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[31:32]
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:24
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:20
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, 0, s[4:5]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[31:32]
-; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[31:32]
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:32
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v4, 0, s[6:7]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[31:32]
-; GFX9-NEXT: v_max_f64 v[6:7], v[6:7], v[31:32]
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:36
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:40
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v6, 0, s[8:9]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[31:32]
-; GFX9-NEXT: v_max_f64 v[8:9], v[8:9], v[31:32]
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:48
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:44
-; GFX9-NEXT: v_cndmask_b32_e64 v8, v8, 0, s[10:11]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e64 s[12:13], v[10:11], v[31:32]
-; GFX9-NEXT: v_max_f64 v[10:11], v[10:11], v[31:32]
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:56
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:52
-; GFX9-NEXT: v_cndmask_b32_e64 v10, v10, 0, s[12:13]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e64 s[14:15], v[12:13], v[31:32]
-; GFX9-NEXT: v_max_f64 v[12:13], v[12:13], v[31:32]
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:64
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:60
-; GFX9-NEXT: v_cndmask_b32_e64 v12, v12, 0, s[14:15]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e64 s[16:17], v[14:15], v[31:32]
-; GFX9-NEXT: v_max_f64 v[14:15], v[14:15], v[31:32]
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:68
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:72
-; GFX9-NEXT: v_cndmask_b32_e64 v14, v14, 0, s[16:17]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e64 s[18:19], v[16:17], v[31:32]
-; GFX9-NEXT: v_max_f64 v[16:17], v[16:17], v[31:32]
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:80
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:76
-; GFX9-NEXT: v_cndmask_b32_e64 v16, v16, 0, s[18:19]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e64 s[20:21], v[18:19], v[31:32]
-; GFX9-NEXT: v_max_f64 v[18:19], v[18:19], v[31:32]
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:88
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:84
-; GFX9-NEXT: v_cndmask_b32_e64 v18, v18, 0, s[20:21]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e64 s[22:23], v[20:21], v[31:32]
-; GFX9-NEXT: v_max_f64 v[20:21], v[20:21], v[31:32]
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:96
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:92
-; GFX9-NEXT: v_cndmask_b32_e64 v20, v20, 0, s[22:23]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e64 s[24:25], v[22:23], v[31:32]
-; GFX9-NEXT: v_max_f64 v[22:23], v[22:23], v[31:32]
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:100
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:104
-; GFX9-NEXT: v_cndmask_b32_e64 v22, v22, 0, s[24:25]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e64 s[26:27], v[24:25], v[31:32]
-; GFX9-NEXT: v_max_f64 v[24:25], v[24:25], v[31:32]
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:112
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:108
-; GFX9-NEXT: v_cndmask_b32_e64 v24, v24, 0, s[26:27]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e64 s[28:29], v[26:27], v[31:32]
-; GFX9-NEXT: v_max_f64 v[26:27], v[26:27], v[31:32]
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:120
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116
-; GFX9-NEXT: v_cndmask_b32_e64 v26, v26, 0, s[28:29]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e64 s[30:31], v[28:29], v[31:32]
-; GFX9-NEXT: v_max_f64 v[28:29], v[28:29], v[31:32]
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32
-; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:128
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:124
-; GFX9-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[30:31]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e64 s[34:35], v[30:31], v[32:33]
-; GFX9-NEXT: v_max_f64 v[30:31], v[30:31], v[32:33]
-; GFX9-NEXT: v_mov_b32_e32 v32, 0x7ff80000
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v32, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v32, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v32, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, v32, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v9, v9, v32, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v11, v11, v32, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v13, v13, v32, s[14:15]
-; GFX9-NEXT: v_cndmask_b32_e64 v15, v15, v32, s[16:17]
-; GFX9-NEXT: v_cndmask_b32_e64 v17, v17, v32, s[18:19]
-; GFX9-NEXT: v_cndmask_b32_e64 v19, v19, v32, s[20:21]
-; GFX9-NEXT: v_cndmask_b32_e64 v21, v21, v32, s[22:23]
-; GFX9-NEXT: v_cndmask_b32_e64 v23, v23, v32, s[24:25]
-; GFX9-NEXT: v_cndmask_b32_e64 v25, v25, v32, s[26:27]
-; GFX9-NEXT: v_cndmask_b32_e64 v27, v27, v32, s[28:29]
-; GFX9-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[30:31]
-; GFX9-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[34:35]
-; GFX9-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[34:35]
-; GFX9-NEXT: v_readlane_b32 s35, v34, 3
-; GFX9-NEXT: v_readlane_b32 s34, v34, 2
-; GFX9-NEXT: v_readlane_b32 s31, v34, 1
-; GFX9-NEXT: v_readlane_b32 s30, v34, 0
-; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX9-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_v16f64:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_accvgpr_write_b32 a1, v40 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a2, v41 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a3, v42 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a4, v43 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a5, v44 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a6, v45 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a7, v46 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a8, v47 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a9, v56 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a10, v57 ; Reload Reuse
-; GFX940-NEXT: scratch_load_dword v37, off, s32 offset:16
-; GFX940-NEXT: scratch_load_dword v36, off, s32 offset:12
-; GFX940-NEXT: scratch_load_dword v39, off, s32 offset:24
-; GFX940-NEXT: scratch_load_dword v38, off, s32 offset:20
-; GFX940-NEXT: scratch_load_dword v49, off, s32 offset:32
-; GFX940-NEXT: scratch_load_dword v48, off, s32 offset:28
-; GFX940-NEXT: scratch_load_dword v57, off, s32 offset:8
-; GFX940-NEXT: scratch_load_dword v56, off, s32 offset:4
-; GFX940-NEXT: scratch_load_dword v47, off, s32 offset:40
-; GFX940-NEXT: scratch_load_dword v46, off, s32 offset:36
-; GFX940-NEXT: scratch_load_dword v45, off, s32 offset:48
-; GFX940-NEXT: scratch_load_dword v44, off, s32 offset:44
-; GFX940-NEXT: scratch_load_dword v43, off, s32 offset:56
-; GFX940-NEXT: scratch_load_dword v42, off, s32 offset:52
-; GFX940-NEXT: scratch_load_dword v41, off, s32 offset:64
-; GFX940-NEXT: scratch_load_dword v40, off, s32 offset:60
-; GFX940-NEXT: scratch_load_dword v55, off, s32 offset:72
-; GFX940-NEXT: scratch_load_dword v54, off, s32 offset:68
-; GFX940-NEXT: scratch_load_dword v53, off, s32 offset:80
-; GFX940-NEXT: scratch_load_dword v52, off, s32 offset:76
-; GFX940-NEXT: scratch_load_dword v51, off, s32 offset:88
-; GFX940-NEXT: scratch_load_dword v50, off, s32 offset:84
-; GFX940-NEXT: scratch_load_dword v35, off, s32 offset:96
-; GFX940-NEXT: scratch_load_dword v34, off, s32 offset:92
-; GFX940-NEXT: scratch_load_dword v31, off, s32
-; GFX940-NEXT: scratch_load_dword v33, off, s32 offset:104
-; GFX940-NEXT: scratch_load_dword v32, off, s32 offset:100
-; GFX940-NEXT: v_accvgpr_write_b32 a11, v58 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a12, v59 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a13, v60 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a14, v61 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a15, v62 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a16, v63 ; Reload Reuse
-; GFX940-NEXT: s_waitcnt vmcnt(25)
-; GFX940-NEXT: v_max_f64 v[58:59], v[2:3], v[36:37]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[36:37]
-; GFX940-NEXT: scratch_load_dword v37, off, s32 offset:112
-; GFX940-NEXT: scratch_load_dword v36, off, s32 offset:108
-; GFX940-NEXT: s_waitcnt vmcnt(25)
-; GFX940-NEXT: v_max_f64 v[60:61], v[4:5], v[38:39]
-; GFX940-NEXT: v_cmp_u_f64_e64 s[0:1], v[4:5], v[38:39]
-; GFX940-NEXT: scratch_load_dword v39, off, s32 offset:120
-; GFX940-NEXT: scratch_load_dword v38, off, s32 offset:116
-; GFX940-NEXT: s_waitcnt vmcnt(25)
-; GFX940-NEXT: v_max_f64 v[62:63], v[6:7], v[48:49]
-; GFX940-NEXT: v_cmp_u_f64_e64 s[2:3], v[6:7], v[48:49]
-; GFX940-NEXT: scratch_load_dword v49, off, s32 offset:128
-; GFX940-NEXT: scratch_load_dword v48, off, s32 offset:124
-; GFX940-NEXT: s_waitcnt vmcnt(25)
-; GFX940-NEXT: v_max_f64 v[2:3], v[0:1], v[56:57]
-; GFX940-NEXT: v_cmp_u_f64_e64 s[4:5], v[0:1], v[56:57]
-; GFX940-NEXT: v_mov_b32_e32 v0, 0x7ff80000
-; GFX940-NEXT: s_waitcnt vmcnt(23)
-; GFX940-NEXT: v_max_f64 v[56:57], v[8:9], v[46:47]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v2, 0, s[4:5]
-; GFX940-NEXT: v_accvgpr_write_b32 a0, v1
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v3, v0, s[4:5]
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v58, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v59, v0, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[8:9], v[46:47]
-; GFX940-NEXT: s_waitcnt vmcnt(21)
-; GFX940-NEXT: v_max_f64 v[46:47], v[10:11], v[44:45]
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v60, 0, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v8, v56, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v9, v57, v0, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[10:11], v[44:45]
-; GFX940-NEXT: s_waitcnt vmcnt(19)
-; GFX940-NEXT: v_max_f64 v[44:45], v[12:13], v[42:43]
-; GFX940-NEXT: v_cndmask_b32_e64 v5, v61, v0, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v10, v46, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v11, v47, v0, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[12:13], v[42:43]
-; GFX940-NEXT: s_waitcnt vmcnt(17)
-; GFX940-NEXT: v_max_f64 v[42:43], v[14:15], v[40:41]
-; GFX940-NEXT: v_cndmask_b32_e64 v6, v62, 0, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v12, v44, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v13, v45, v0, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[40:41]
-; GFX940-NEXT: s_waitcnt vmcnt(15)
-; GFX940-NEXT: v_max_f64 v[40:41], v[16:17], v[54:55]
-; GFX940-NEXT: v_cndmask_b32_e64 v7, v63, v0, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v14, v42, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v15, v43, v0, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[16:17], v[54:55]
-; GFX940-NEXT: s_waitcnt vmcnt(13)
-; GFX940-NEXT: v_max_f64 v[54:55], v[18:19], v[52:53]
-; GFX940-NEXT: v_accvgpr_read_b32 v63, a16 ; Reload Reuse
-; GFX940-NEXT: v_cndmask_b32_e64 v16, v40, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v17, v41, v0, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[18:19], v[52:53]
-; GFX940-NEXT: s_waitcnt vmcnt(11)
-; GFX940-NEXT: v_max_f64 v[52:53], v[20:21], v[50:51]
-; GFX940-NEXT: v_accvgpr_read_b32 v62, a15 ; Reload Reuse
-; GFX940-NEXT: v_cndmask_b32_e64 v18, v54, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v19, v55, v0, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[20:21], v[50:51]
-; GFX940-NEXT: s_waitcnt vmcnt(9)
-; GFX940-NEXT: v_max_f64 v[50:51], v[22:23], v[34:35]
-; GFX940-NEXT: v_accvgpr_read_b32 v61, a14 ; Reload Reuse
-; GFX940-NEXT: v_cndmask_b32_e64 v20, v52, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v21, v53, v0, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[22:23], v[34:35]
-; GFX940-NEXT: s_waitcnt vmcnt(6)
-; GFX940-NEXT: v_max_f64 v[34:35], v[24:25], v[32:33]
-; GFX940-NEXT: v_accvgpr_read_b32 v60, a13 ; Reload Reuse
-; GFX940-NEXT: v_cndmask_b32_e64 v22, v50, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v23, v51, v0, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[24:25], v[32:33]
-; GFX940-NEXT: v_accvgpr_read_b32 v59, a12 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v58, a11 ; Reload Reuse
-; GFX940-NEXT: v_cndmask_b32_e64 v24, v34, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v25, v35, v0, vcc
-; GFX940-NEXT: v_accvgpr_read_b32 v57, a10 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v56, a9 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v47, a8 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v46, a7 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v45, a6 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v44, a5 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v43, a4 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v42, a3 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v41, a2 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v40, a1 ; Reload Reuse
-; GFX940-NEXT: s_waitcnt vmcnt(4)
-; GFX940-NEXT: v_max_f64 v[32:33], v[26:27], v[36:37]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[26:27], v[36:37]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v26, v32, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v27, v33, v0, vcc
-; GFX940-NEXT: s_waitcnt vmcnt(2)
-; GFX940-NEXT: v_max_f64 v[32:33], v[28:29], v[38:39]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[28:29], v[38:39]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v28, v32, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v29, v33, v0, vcc
-; GFX940-NEXT: s_waitcnt vmcnt(0)
-; GFX940-NEXT: v_max_f64 v[32:33], v[30:31], v[48:49]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[30:31], v[48:49]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v30, v32, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v31, v33, v0, vcc
-; GFX940-NEXT: v_accvgpr_read_b32 v0, a0
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v16f64:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX900-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
+; GFX900-NEXT: s_mov_b64 exec, s[4:5]
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4
+; GFX900-NEXT: v_writelane_b32 v34, s30, 0
+; GFX900-NEXT: v_writelane_b32 v34, s31, 1
+; GFX900-NEXT: v_writelane_b32 v34, s34, 2
+; GFX900-NEXT: v_writelane_b32 v34, s35, 3
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[31:32]
+; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[31:32]
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:16
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:12
+; GFX900-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[31:32]
+; GFX900-NEXT: v_max_f64 v[2:3], v[2:3], v[31:32]
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:24
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:20
+; GFX900-NEXT: v_cndmask_b32_e64 v2, v2, 0, s[4:5]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[31:32]
+; GFX900-NEXT: v_max_f64 v[4:5], v[4:5], v[31:32]
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:32
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28
+; GFX900-NEXT: v_cndmask_b32_e64 v4, v4, 0, s[6:7]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[31:32]
+; GFX900-NEXT: v_max_f64 v[6:7], v[6:7], v[31:32]
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:36
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:40
+; GFX900-NEXT: v_cndmask_b32_e64 v6, v6, 0, s[8:9]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[31:32]
+; GFX900-NEXT: v_max_f64 v[8:9], v[8:9], v[31:32]
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:48
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:44
+; GFX900-NEXT: v_cndmask_b32_e64 v8, v8, 0, s[10:11]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e64 s[12:13], v[10:11], v[31:32]
+; GFX900-NEXT: v_max_f64 v[10:11], v[10:11], v[31:32]
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:56
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:52
+; GFX900-NEXT: v_cndmask_b32_e64 v10, v10, 0, s[12:13]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e64 s[14:15], v[12:13], v[31:32]
+; GFX900-NEXT: v_max_f64 v[12:13], v[12:13], v[31:32]
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:64
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:60
+; GFX900-NEXT: v_cndmask_b32_e64 v12, v12, 0, s[14:15]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e64 s[16:17], v[14:15], v[31:32]
+; GFX900-NEXT: v_max_f64 v[14:15], v[14:15], v[31:32]
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:68
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:72
+; GFX900-NEXT: v_cndmask_b32_e64 v14, v14, 0, s[16:17]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e64 s[18:19], v[16:17], v[31:32]
+; GFX900-NEXT: v_max_f64 v[16:17], v[16:17], v[31:32]
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:80
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:76
+; GFX900-NEXT: v_cndmask_b32_e64 v16, v16, 0, s[18:19]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e64 s[20:21], v[18:19], v[31:32]
+; GFX900-NEXT: v_max_f64 v[18:19], v[18:19], v[31:32]
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:88
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:84
+; GFX900-NEXT: v_cndmask_b32_e64 v18, v18, 0, s[20:21]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e64 s[22:23], v[20:21], v[31:32]
+; GFX900-NEXT: v_max_f64 v[20:21], v[20:21], v[31:32]
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:96
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:92
+; GFX900-NEXT: v_cndmask_b32_e64 v20, v20, 0, s[22:23]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e64 s[24:25], v[22:23], v[31:32]
+; GFX900-NEXT: v_max_f64 v[22:23], v[22:23], v[31:32]
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:100
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:104
+; GFX900-NEXT: v_cndmask_b32_e64 v22, v22, 0, s[24:25]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e64 s[26:27], v[24:25], v[31:32]
+; GFX900-NEXT: v_max_f64 v[24:25], v[24:25], v[31:32]
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:112
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:108
+; GFX900-NEXT: v_cndmask_b32_e64 v24, v24, 0, s[26:27]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e64 s[28:29], v[26:27], v[31:32]
+; GFX900-NEXT: v_max_f64 v[26:27], v[26:27], v[31:32]
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:120
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116
+; GFX900-NEXT: v_cndmask_b32_e64 v26, v26, 0, s[28:29]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e64 s[30:31], v[28:29], v[31:32]
+; GFX900-NEXT: v_max_f64 v[28:29], v[28:29], v[31:32]
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32
+; GFX900-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:128
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:124
+; GFX900-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[30:31]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e64 s[34:35], v[30:31], v[32:33]
+; GFX900-NEXT: v_max_f64 v[30:31], v[30:31], v[32:33]
+; GFX900-NEXT: v_mov_b32_e32 v32, 0x7ff80000
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v32, vcc
+; GFX900-NEXT: v_cndmask_b32_e64 v3, v3, v32, s[4:5]
+; GFX900-NEXT: v_cndmask_b32_e64 v5, v5, v32, s[6:7]
+; GFX900-NEXT: v_cndmask_b32_e64 v7, v7, v32, s[8:9]
+; GFX900-NEXT: v_cndmask_b32_e64 v9, v9, v32, s[10:11]
+; GFX900-NEXT: v_cndmask_b32_e64 v11, v11, v32, s[12:13]
+; GFX900-NEXT: v_cndmask_b32_e64 v13, v13, v32, s[14:15]
+; GFX900-NEXT: v_cndmask_b32_e64 v15, v15, v32, s[16:17]
+; GFX900-NEXT: v_cndmask_b32_e64 v17, v17, v32, s[18:19]
+; GFX900-NEXT: v_cndmask_b32_e64 v19, v19, v32, s[20:21]
+; GFX900-NEXT: v_cndmask_b32_e64 v21, v21, v32, s[22:23]
+; GFX900-NEXT: v_cndmask_b32_e64 v23, v23, v32, s[24:25]
+; GFX900-NEXT: v_cndmask_b32_e64 v25, v25, v32, s[26:27]
+; GFX900-NEXT: v_cndmask_b32_e64 v27, v27, v32, s[28:29]
+; GFX900-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[30:31]
+; GFX900-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[34:35]
+; GFX900-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[34:35]
+; GFX900-NEXT: v_readlane_b32 s35, v34, 3
+; GFX900-NEXT: v_readlane_b32 s34, v34, 2
+; GFX900-NEXT: v_readlane_b32 s31, v34, 1
+; GFX900-NEXT: v_readlane_b32 s30, v34, 0
+; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX900-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload
+; GFX900-NEXT: s_mov_b64 exec, s[4:5]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v16f64:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_accvgpr_write_b32 a1, v40 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_write_b32 a2, v41 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_write_b32 a3, v42 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_write_b32 a4, v43 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_write_b32 a5, v44 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_write_b32 a6, v45 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_write_b32 a7, v46 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_write_b32 a8, v47 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_write_b32 a9, v56 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_write_b32 a10, v57 ; Reload Reuse
+; GFX950-NEXT: scratch_load_dword v37, off, s32 offset:16
+; GFX950-NEXT: scratch_load_dword v36, off, s32 offset:12
+; GFX950-NEXT: scratch_load_dword v39, off, s32 offset:24
+; GFX950-NEXT: scratch_load_dword v38, off, s32 offset:20
+; GFX950-NEXT: scratch_load_dword v49, off, s32 offset:32
+; GFX950-NEXT: scratch_load_dword v48, off, s32 offset:28
+; GFX950-NEXT: scratch_load_dword v57, off, s32 offset:8
+; GFX950-NEXT: scratch_load_dword v56, off, s32 offset:4
+; GFX950-NEXT: scratch_load_dword v47, off, s32 offset:40
+; GFX950-NEXT: scratch_load_dword v46, off, s32 offset:36
+; GFX950-NEXT: scratch_load_dword v45, off, s32 offset:48
+; GFX950-NEXT: scratch_load_dword v44, off, s32 offset:44
+; GFX950-NEXT: scratch_load_dword v43, off, s32 offset:56
+; GFX950-NEXT: scratch_load_dword v42, off, s32 offset:52
+; GFX950-NEXT: scratch_load_dword v41, off, s32 offset:64
+; GFX950-NEXT: scratch_load_dword v40, off, s32 offset:60
+; GFX950-NEXT: scratch_load_dword v55, off, s32 offset:72
+; GFX950-NEXT: scratch_load_dword v54, off, s32 offset:68
+; GFX950-NEXT: scratch_load_dword v53, off, s32 offset:80
+; GFX950-NEXT: scratch_load_dword v52, off, s32 offset:76
+; GFX950-NEXT: scratch_load_dword v51, off, s32 offset:88
+; GFX950-NEXT: scratch_load_dword v50, off, s32 offset:84
+; GFX950-NEXT: scratch_load_dword v35, off, s32 offset:96
+; GFX950-NEXT: scratch_load_dword v34, off, s32 offset:92
+; GFX950-NEXT: scratch_load_dword v31, off, s32
+; GFX950-NEXT: scratch_load_dword v33, off, s32 offset:104
+; GFX950-NEXT: scratch_load_dword v32, off, s32 offset:100
+; GFX950-NEXT: v_accvgpr_write_b32 a11, v58 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_write_b32 a12, v59 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_write_b32 a13, v60 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_write_b32 a14, v61 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_write_b32 a15, v62 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_write_b32 a16, v63 ; Reload Reuse
+; GFX950-NEXT: s_waitcnt vmcnt(25)
+; GFX950-NEXT: v_max_f64 v[58:59], v[2:3], v[36:37]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[36:37]
+; GFX950-NEXT: scratch_load_dword v37, off, s32 offset:112
+; GFX950-NEXT: scratch_load_dword v36, off, s32 offset:108
+; GFX950-NEXT: s_waitcnt vmcnt(25)
+; GFX950-NEXT: v_max_f64 v[60:61], v[4:5], v[38:39]
+; GFX950-NEXT: v_cmp_u_f64_e64 s[0:1], v[4:5], v[38:39]
+; GFX950-NEXT: scratch_load_dword v39, off, s32 offset:120
+; GFX950-NEXT: scratch_load_dword v38, off, s32 offset:116
+; GFX950-NEXT: s_waitcnt vmcnt(25)
+; GFX950-NEXT: v_max_f64 v[62:63], v[6:7], v[48:49]
+; GFX950-NEXT: v_cmp_u_f64_e64 s[2:3], v[6:7], v[48:49]
+; GFX950-NEXT: scratch_load_dword v49, off, s32 offset:128
+; GFX950-NEXT: scratch_load_dword v48, off, s32 offset:124
+; GFX950-NEXT: s_waitcnt vmcnt(25)
+; GFX950-NEXT: v_max_f64 v[2:3], v[0:1], v[56:57]
+; GFX950-NEXT: v_cmp_u_f64_e64 s[4:5], v[0:1], v[56:57]
+; GFX950-NEXT: v_mov_b32_e32 v0, 0x7ff80000
+; GFX950-NEXT: s_waitcnt vmcnt(23)
+; GFX950-NEXT: v_max_f64 v[56:57], v[8:9], v[46:47]
+; GFX950-NEXT: v_cndmask_b32_e64 v1, v2, 0, s[4:5]
+; GFX950-NEXT: v_accvgpr_write_b32 a0, v1
+; GFX950-NEXT: v_cndmask_b32_e64 v1, v3, v0, s[4:5]
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v58, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v59, v0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[8:9], v[46:47]
+; GFX950-NEXT: s_waitcnt vmcnt(21)
+; GFX950-NEXT: v_max_f64 v[46:47], v[10:11], v[44:45]
+; GFX950-NEXT: v_cndmask_b32_e64 v4, v60, 0, s[0:1]
+; GFX950-NEXT: v_cndmask_b32_e64 v8, v56, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v9, v57, v0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[10:11], v[44:45]
+; GFX950-NEXT: s_waitcnt vmcnt(19)
+; GFX950-NEXT: v_max_f64 v[44:45], v[12:13], v[42:43]
+; GFX950-NEXT: v_cndmask_b32_e64 v5, v61, v0, s[0:1]
+; GFX950-NEXT: v_cndmask_b32_e64 v10, v46, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v11, v47, v0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[12:13], v[42:43]
+; GFX950-NEXT: s_waitcnt vmcnt(17)
+; GFX950-NEXT: v_max_f64 v[42:43], v[14:15], v[40:41]
+; GFX950-NEXT: v_cndmask_b32_e64 v6, v62, 0, s[2:3]
+; GFX950-NEXT: v_cndmask_b32_e64 v12, v44, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v13, v45, v0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[40:41]
+; GFX950-NEXT: s_waitcnt vmcnt(15)
+; GFX950-NEXT: v_max_f64 v[40:41], v[16:17], v[54:55]
+; GFX950-NEXT: v_cndmask_b32_e64 v7, v63, v0, s[2:3]
+; GFX950-NEXT: v_cndmask_b32_e64 v14, v42, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v15, v43, v0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[16:17], v[54:55]
+; GFX950-NEXT: s_waitcnt vmcnt(13)
+; GFX950-NEXT: v_max_f64 v[54:55], v[18:19], v[52:53]
+; GFX950-NEXT: v_accvgpr_read_b32 v63, a16 ; Reload Reuse
+; GFX950-NEXT: v_cndmask_b32_e64 v16, v40, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v17, v41, v0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[18:19], v[52:53]
+; GFX950-NEXT: s_waitcnt vmcnt(11)
+; GFX950-NEXT: v_max_f64 v[52:53], v[20:21], v[50:51]
+; GFX950-NEXT: v_accvgpr_read_b32 v62, a15 ; Reload Reuse
+; GFX950-NEXT: v_cndmask_b32_e64 v18, v54, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v19, v55, v0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[20:21], v[50:51]
+; GFX950-NEXT: s_waitcnt vmcnt(9)
+; GFX950-NEXT: v_max_f64 v[50:51], v[22:23], v[34:35]
+; GFX950-NEXT: v_accvgpr_read_b32 v61, a14 ; Reload Reuse
+; GFX950-NEXT: v_cndmask_b32_e64 v20, v52, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v21, v53, v0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[22:23], v[34:35]
+; GFX950-NEXT: s_waitcnt vmcnt(6)
+; GFX950-NEXT: v_max_f64 v[34:35], v[24:25], v[32:33]
+; GFX950-NEXT: v_accvgpr_read_b32 v60, a13 ; Reload Reuse
+; GFX950-NEXT: v_cndmask_b32_e64 v22, v50, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v23, v51, v0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[24:25], v[32:33]
+; GFX950-NEXT: v_accvgpr_read_b32 v59, a12 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v58, a11 ; Reload Reuse
+; GFX950-NEXT: v_cndmask_b32_e64 v24, v34, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v25, v35, v0, vcc
+; GFX950-NEXT: v_accvgpr_read_b32 v57, a10 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v56, a9 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v47, a8 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v46, a7 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v45, a6 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v44, a5 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v43, a4 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v42, a3 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v41, a2 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v40, a1 ; Reload Reuse
+; GFX950-NEXT: s_waitcnt vmcnt(4)
+; GFX950-NEXT: v_max_f64 v[32:33], v[26:27], v[36:37]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[26:27], v[36:37]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v26, v32, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v27, v33, v0, vcc
+; GFX950-NEXT: s_waitcnt vmcnt(2)
+; GFX950-NEXT: v_max_f64 v[32:33], v[28:29], v[38:39]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[28:29], v[38:39]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v28, v32, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v29, v33, v0, vcc
+; GFX950-NEXT: s_waitcnt vmcnt(0)
+; GFX950-NEXT: v_max_f64 v[32:33], v[30:31], v[48:49]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[30:31], v[48:49]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v30, v32, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v31, v33, v0, vcc
+; GFX950-NEXT: v_accvgpr_read_b32 v0, a0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v16f64:
; GFX10: ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll
index a74043378a2598..329a85f91c2514 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll
@@ -2,7 +2,8 @@
; xUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck -check-prefixes=GCN,GFX7 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GCN,GFX8 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GCN,GFX940 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX900 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX950 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX10 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GCN,GFX12 %s
@@ -17,24 +18,24 @@ define half @v_minimum_f16(half %src0, half %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_f16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f16_e32 v2, v0, v1
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x7e00
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_f16:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f16_e32 v2, v0, v1
-; GFX940-NEXT: v_mov_b32_e32 v3, 0x7e00
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_f16:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f16_e32 v2, v0, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, 0x7e00
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_f16:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f16_e32 v2, v0, v1
+; GFX950-NEXT: v_mov_b32_e32 v3, 0x7e00
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_f16:
; GFX10: ; %bb.0:
@@ -79,12 +80,6 @@ define half @v_minimum_f16__nnan(half %src0, half %src1) {
; GFX9-NEXT: v_min_f16_e32 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_minimum_f16__nnan:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_minimum_f16__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -120,24 +115,24 @@ define half @v_minimum_f16__nsz(half %src0, half %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_f16__nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f16_e32 v2, v0, v1
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x7e00
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_f16__nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f16_e32 v2, v0, v1
-; GFX940-NEXT: v_mov_b32_e32 v3, 0x7e00
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_f16__nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f16_e32 v2, v0, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, 0x7e00
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_f16__nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f16_e32 v2, v0, v1
+; GFX950-NEXT: v_mov_b32_e32 v3, 0x7e00
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_f16__nsz:
; GFX10: ; %bb.0:
@@ -182,12 +177,6 @@ define half @v_minimum_f16__nnan_nsz(half %src0, half %src1) {
; GFX9-NEXT: v_min_f16_e32 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_minimum_f16__nnan_nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_minimum_f16__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -224,26 +213,26 @@ define half @v_minimum_f16__nnan_src0(half %arg0, half %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_f16__nnan_src0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_add_f16_e32 v0, 1.0, v0
-; GFX9-NEXT: v_min_f16_e32 v2, v0, v1
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x7e00
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_f16__nnan_src0:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_add_f16_e32 v0, 1.0, v0
-; GFX940-NEXT: v_min_f16_e32 v2, v0, v1
-; GFX940-NEXT: v_mov_b32_e32 v3, 0x7e00
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_f16__nnan_src0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_add_f16_e32 v0, 1.0, v0
+; GFX900-NEXT: v_min_f16_e32 v2, v0, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, 0x7e00
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_f16__nnan_src0:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_add_f16_e32 v0, 1.0, v0
+; GFX950-NEXT: v_min_f16_e32 v2, v0, v1
+; GFX950-NEXT: v_mov_b32_e32 v3, 0x7e00
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_f16__nnan_src0:
; GFX10: ; %bb.0:
@@ -291,26 +280,26 @@ define half @v_minimum_f16__nnan_src1(half %src0, half %arg1) {
; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_f16__nnan_src1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_add_f16_e32 v1, 1.0, v1
-; GFX9-NEXT: v_min_f16_e32 v2, v0, v1
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x7e00
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_f16__nnan_src1:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_add_f16_e32 v1, 1.0, v1
-; GFX940-NEXT: v_min_f16_e32 v2, v0, v1
-; GFX940-NEXT: v_mov_b32_e32 v3, 0x7e00
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_f16__nnan_src1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_add_f16_e32 v1, 1.0, v1
+; GFX900-NEXT: v_min_f16_e32 v2, v0, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, 0x7e00
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_f16__nnan_src1:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_add_f16_e32 v1, 1.0, v1
+; GFX950-NEXT: v_min_f16_e32 v2, v0, v1
+; GFX950-NEXT: v_mov_b32_e32 v3, 0x7e00
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_f16__nnan_src1:
; GFX10: ; %bb.0:
@@ -362,34 +351,34 @@ define void @s_minimum_f16(half inreg %src0, half inreg %src1) {
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: s_minimum_f16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v0, s17
-; GFX9-NEXT: v_min_f16_e32 v1, s16, v0
-; GFX9-NEXT: v_mov_b32_e32 v2, 0x7e00
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, s16, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use v0
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: s_minimum_f16:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_mov_b32_e32 v0, s1
-; GFX940-NEXT: v_min_f16_e32 v1, s0, v0
-; GFX940-NEXT: v_mov_b32_e32 v2, 0x7e00
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, s0, v0
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX940-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX940-NEXT: ;;#ASMSTART
-; GFX940-NEXT: ; use v0
-; GFX940-NEXT: ;;#ASMEND
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_minimum_f16:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, s17
+; GFX900-NEXT: v_min_f16_e32 v1, s16, v0
+; GFX900-NEXT: v_mov_b32_e32 v2, 0x7e00
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, s16, v0
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX900-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use v0
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: s_minimum_f16:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_mov_b32_e32 v0, s1
+; GFX950-NEXT: v_min_f16_e32 v1, s0, v0
+; GFX950-NEXT: v_mov_b32_e32 v2, 0x7e00
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, s0, v0
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX950-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX950-NEXT: ;;#ASMSTART
+; GFX950-NEXT: ; use v0
+; GFX950-NEXT: ;;#ASMEND
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: s_minimum_f16:
; GFX10: ; %bb.0:
@@ -456,35 +445,35 @@ define <2 x half> @v_minimum_v2f16(<2 x half> %src0, <2 x half> %src1) {
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v2f16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_min_f16 v2, v0, v1
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x7e00
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v3, v2, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v0, v0, v4, s4
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_v2f16:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_pk_min_f16 v2, v0, v1
-; GFX940-NEXT: v_mov_b32_e32 v3, 0x7e00
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
-; GFX940-NEXT: s_mov_b32 s0, 0x5040100
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v3, v2, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX940-NEXT: v_perm_b32 v0, v0, v4, s0
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v2f16:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_pk_min_f16 v2, v0, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, 0x7e00
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX900-NEXT: v_cndmask_b32_e32 v4, v3, v2, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v2, 16, v2
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX900-NEXT: s_mov_b32 s4, 0x5040100
+; GFX900-NEXT: v_perm_b32 v0, v0, v4, s4
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v2f16:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_pk_min_f16 v2, v0, v1
+; GFX950-NEXT: v_mov_b32_e32 v3, 0x7e00
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX950-NEXT: s_mov_b32 s0, 0x5040100
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v4, v3, v2, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v2, 16, v2
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX950-NEXT: v_perm_b32 v0, v0, v4, s0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v2f16:
; GFX10: ; %bb.0:
@@ -542,12 +531,6 @@ define <2 x half> @v_minimum_v2f16__nnan(<2 x half> %src0, <2 x half> %src1) {
; GFX9-NEXT: v_pk_min_f16 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_minimum_v2f16__nnan:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_pk_min_f16 v0, v0, v1
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_minimum_v2f16__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -590,35 +573,35 @@ define <2 x half> @v_minimum_v2f16__nsz(<2 x half> %src0, <2 x half> %src1) {
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v2f16__nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_min_f16 v2, v0, v1
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x7e00
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v3, v2, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v0, v0, v4, s4
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_v2f16__nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_pk_min_f16 v2, v0, v1
-; GFX940-NEXT: v_mov_b32_e32 v3, 0x7e00
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
-; GFX940-NEXT: s_mov_b32 s0, 0x5040100
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v3, v2, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX940-NEXT: v_perm_b32 v0, v0, v4, s0
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v2f16__nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_pk_min_f16 v2, v0, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, 0x7e00
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX900-NEXT: v_cndmask_b32_e32 v4, v3, v2, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v2, 16, v2
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX900-NEXT: s_mov_b32 s4, 0x5040100
+; GFX900-NEXT: v_perm_b32 v0, v0, v4, s4
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v2f16__nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_pk_min_f16 v2, v0, v1
+; GFX950-NEXT: v_mov_b32_e32 v3, 0x7e00
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX950-NEXT: s_mov_b32 s0, 0x5040100
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v4, v3, v2, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v2, 16, v2
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX950-NEXT: v_perm_b32 v0, v0, v4, s0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v2f16__nsz:
; GFX10: ; %bb.0:
@@ -676,12 +659,6 @@ define <2 x half> @v_minimum_v2f16__nnan_nsz(<2 x half> %src0, <2 x half> %src1)
; GFX9-NEXT: v_pk_min_f16 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_minimum_v2f16__nnan_nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_pk_min_f16 v0, v0, v1
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_minimum_v2f16__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -729,50 +706,50 @@ define void @s_minimum_v2f16(<2 x half> inreg %src0, <2 x half> inreg %src1) {
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: s_minimum_v2f16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v0, s17
-; GFX9-NEXT: v_mov_b32_e32 v1, s17
-; GFX9-NEXT: s_lshr_b32 s4, s17, 16
-; GFX9-NEXT: v_pk_min_f16 v1, s16, v1
-; GFX9-NEXT: v_mov_b32_e32 v2, 0x7e00
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, s16, v0
-; GFX9-NEXT: s_lshr_b32 s5, s16, 16
-; GFX9-NEXT: v_mov_b32_e32 v3, s4
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v1
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, s5, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX9-NEXT: v_lshl_or_b32 v0, v1, 16, v0
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use v0
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: s_minimum_v2f16:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_mov_b32_e32 v0, s1
-; GFX940-NEXT: v_mov_b32_e32 v1, s1
-; GFX940-NEXT: s_lshr_b32 s1, s1, 16
-; GFX940-NEXT: v_pk_min_f16 v1, s0, v1
-; GFX940-NEXT: v_mov_b32_e32 v2, 0x7e00
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, s0, v0
-; GFX940-NEXT: s_lshr_b32 s0, s0, 16
-; GFX940-NEXT: v_mov_b32_e32 v3, s1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, s0, v3
-; GFX940-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX940-NEXT: v_lshl_or_b32 v0, v1, 16, v0
-; GFX940-NEXT: ;;#ASMSTART
-; GFX940-NEXT: ; use v0
-; GFX940-NEXT: ;;#ASMEND
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_minimum_v2f16:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, s17
+; GFX900-NEXT: v_mov_b32_e32 v1, s17
+; GFX900-NEXT: s_lshr_b32 s4, s17, 16
+; GFX900-NEXT: v_pk_min_f16 v1, s16, v1
+; GFX900-NEXT: v_mov_b32_e32 v2, 0x7e00
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, s16, v0
+; GFX900-NEXT: s_lshr_b32 s5, s16, 16
+; GFX900-NEXT: v_mov_b32_e32 v3, s4
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, s5, v3
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX900-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX900-NEXT: v_lshl_or_b32 v0, v1, 16, v0
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use v0
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: s_minimum_v2f16:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_mov_b32_e32 v0, s1
+; GFX950-NEXT: v_mov_b32_e32 v1, s1
+; GFX950-NEXT: s_lshr_b32 s1, s1, 16
+; GFX950-NEXT: v_pk_min_f16 v1, s0, v1
+; GFX950-NEXT: v_mov_b32_e32 v2, 0x7e00
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, s0, v0
+; GFX950-NEXT: s_lshr_b32 s0, s0, 16
+; GFX950-NEXT: v_mov_b32_e32 v3, s1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v1, 16, v1
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, s0, v3
+; GFX950-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX950-NEXT: v_lshl_or_b32 v0, v1, 16, v0
+; GFX950-NEXT: ;;#ASMSTART
+; GFX950-NEXT: ; use v0
+; GFX950-NEXT: ;;#ASMEND
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: s_minimum_v2f16:
; GFX10: ; %bb.0:
@@ -850,41 +827,41 @@ define <3 x half> @v_minimum_v3f16(<3 x half> %src0, <3 x half> %src1) {
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v3f16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_min_f16 v4, v1, v3
-; GFX9-NEXT: v_mov_b32_e32 v5, 0x7e00
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX9-NEXT: v_pk_min_f16 v3, v0, v2
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v3
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v0, v0, v4, s4
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_v3f16:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_pk_min_f16 v4, v1, v3
-; GFX940-NEXT: v_mov_b32_e32 v5, 0x7e00
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
-; GFX940-NEXT: v_pk_min_f16 v3, v0, v2
-; GFX940-NEXT: s_mov_b32 s0, 0x5040100
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v3, 16, v3
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
-; GFX940-NEXT: v_perm_b32 v0, v0, v4, s0
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v3f16:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_pk_min_f16 v4, v1, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, 0x7e00
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
+; GFX900-NEXT: v_pk_min_f16 v3, v0, v2
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX900-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v3, 16, v3
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
+; GFX900-NEXT: s_mov_b32 s4, 0x5040100
+; GFX900-NEXT: v_perm_b32 v0, v0, v4, s4
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v3f16:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_pk_min_f16 v4, v1, v3
+; GFX950-NEXT: v_mov_b32_e32 v5, 0x7e00
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX950-NEXT: v_pk_min_f16 v3, v0, v2
+; GFX950-NEXT: s_mov_b32 s0, 0x5040100
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v3, 16, v3
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
+; GFX950-NEXT: v_perm_b32 v0, v0, v4, s0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v3f16:
; GFX10: ; %bb.0:
@@ -952,13 +929,6 @@ define <3 x half> @v_minimum_v3f16__nnan(<3 x half> %src0, <3 x half> %src1) {
; GFX9-NEXT: v_pk_min_f16 v1, v1, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_minimum_v3f16__nnan:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_pk_min_f16 v0, v0, v2
-; GFX940-NEXT: v_pk_min_f16 v1, v1, v3
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_minimum_v3f16__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1007,41 +977,41 @@ define <3 x half> @v_minimum_v3f16__nsz(<3 x half> %src0, <3 x half> %src1) {
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v3f16__nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_min_f16 v4, v1, v3
-; GFX9-NEXT: v_mov_b32_e32 v5, 0x7e00
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX9-NEXT: v_pk_min_f16 v3, v0, v2
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v3
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v0, v0, v4, s4
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_v3f16__nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_pk_min_f16 v4, v1, v3
-; GFX940-NEXT: v_mov_b32_e32 v5, 0x7e00
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
-; GFX940-NEXT: v_pk_min_f16 v3, v0, v2
-; GFX940-NEXT: s_mov_b32 s0, 0x5040100
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v3, 16, v3
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
-; GFX940-NEXT: v_perm_b32 v0, v0, v4, s0
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v3f16__nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_pk_min_f16 v4, v1, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, 0x7e00
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
+; GFX900-NEXT: v_pk_min_f16 v3, v0, v2
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX900-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v3, 16, v3
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
+; GFX900-NEXT: s_mov_b32 s4, 0x5040100
+; GFX900-NEXT: v_perm_b32 v0, v0, v4, s4
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v3f16__nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_pk_min_f16 v4, v1, v3
+; GFX950-NEXT: v_mov_b32_e32 v5, 0x7e00
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX950-NEXT: v_pk_min_f16 v3, v0, v2
+; GFX950-NEXT: s_mov_b32 s0, 0x5040100
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v3, 16, v3
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
+; GFX950-NEXT: v_perm_b32 v0, v0, v4, s0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v3f16__nsz:
; GFX10: ; %bb.0:
@@ -1109,13 +1079,6 @@ define <3 x half> @v_minimum_v3f16__nnan_nsz(<3 x half> %src0, <3 x half> %src1)
; GFX9-NEXT: v_pk_min_f16 v1, v1, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_minimum_v3f16__nnan_nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_pk_min_f16 v0, v0, v2
-; GFX940-NEXT: v_pk_min_f16 v1, v1, v3
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_minimum_v3f16__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1171,51 +1134,51 @@ define <4 x half> @v_minimum_v4f16(<4 x half> %src0, <4 x half> %src1) {
; GFX8-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v4f16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_min_f16 v4, v1, v3
-; GFX9-NEXT: v_mov_b32_e32 v5, 0x7e00
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v4
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX9-NEXT: v_pk_min_f16 v3, v0, v2
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v3
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v0, v0, v4, s4
-; GFX9-NEXT: v_perm_b32 v1, v1, v6, s4
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_v4f16:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_pk_min_f16 v4, v1, v3
-; GFX940-NEXT: v_mov_b32_e32 v5, 0x7e00
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
-; GFX940-NEXT: s_mov_b32 s0, 0x5040100
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v4, 16, v4
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: v_pk_min_f16 v3, v0, v2
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
-; GFX940-NEXT: v_perm_b32 v1, v1, v6, s0
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v3, 16, v3
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
-; GFX940-NEXT: v_perm_b32 v0, v0, v4, s0
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v4f16:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_pk_min_f16 v4, v1, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, 0x7e00
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX900-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v4, 16, v4
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
+; GFX900-NEXT: v_pk_min_f16 v3, v0, v2
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX900-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v3, 16, v3
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
+; GFX900-NEXT: s_mov_b32 s4, 0x5040100
+; GFX900-NEXT: v_perm_b32 v0, v0, v4, s4
+; GFX900-NEXT: v_perm_b32 v1, v1, v6, s4
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v4f16:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_pk_min_f16 v4, v1, v3
+; GFX950-NEXT: v_mov_b32_e32 v5, 0x7e00
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX950-NEXT: s_mov_b32 s0, 0x5040100
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v4, 16, v4
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: v_pk_min_f16 v3, v0, v2
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX950-NEXT: v_perm_b32 v1, v1, v6, s0
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v3, 16, v3
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
+; GFX950-NEXT: v_perm_b32 v0, v0, v4, s0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v4f16:
; GFX10: ; %bb.0:
@@ -1294,13 +1257,6 @@ define <4 x half> @v_minimum_v4f16__nnan(<4 x half> %src0, <4 x half> %src1) {
; GFX9-NEXT: v_pk_min_f16 v1, v1, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_minimum_v4f16__nnan:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_pk_min_f16 v0, v0, v2
-; GFX940-NEXT: v_pk_min_f16 v1, v1, v3
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_minimum_v4f16__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1356,51 +1312,51 @@ define <4 x half> @v_minimum_v4f16__nsz(<4 x half> %src0, <4 x half> %src1) {
; GFX8-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v4f16__nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_min_f16 v4, v1, v3
-; GFX9-NEXT: v_mov_b32_e32 v5, 0x7e00
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v4
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX9-NEXT: v_pk_min_f16 v3, v0, v2
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v3
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v0, v0, v4, s4
-; GFX9-NEXT: v_perm_b32 v1, v1, v6, s4
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_v4f16__nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_pk_min_f16 v4, v1, v3
-; GFX940-NEXT: v_mov_b32_e32 v5, 0x7e00
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
-; GFX940-NEXT: s_mov_b32 s0, 0x5040100
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v4, 16, v4
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: v_pk_min_f16 v3, v0, v2
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
-; GFX940-NEXT: v_perm_b32 v1, v1, v6, s0
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v3, 16, v3
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
-; GFX940-NEXT: v_perm_b32 v0, v0, v4, s0
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v4f16__nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_pk_min_f16 v4, v1, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, 0x7e00
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX900-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v4, 16, v4
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
+; GFX900-NEXT: v_pk_min_f16 v3, v0, v2
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX900-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v3, 16, v3
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
+; GFX900-NEXT: s_mov_b32 s4, 0x5040100
+; GFX900-NEXT: v_perm_b32 v0, v0, v4, s4
+; GFX900-NEXT: v_perm_b32 v1, v1, v6, s4
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v4f16__nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_pk_min_f16 v4, v1, v3
+; GFX950-NEXT: v_mov_b32_e32 v5, 0x7e00
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX950-NEXT: s_mov_b32 s0, 0x5040100
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v4, 16, v4
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: v_pk_min_f16 v3, v0, v2
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX950-NEXT: v_perm_b32 v1, v1, v6, s0
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v3, 16, v3
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
+; GFX950-NEXT: v_perm_b32 v0, v0, v4, s0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v4f16__nsz:
; GFX10: ; %bb.0:
@@ -1479,13 +1435,6 @@ define <4 x half> @v_minimum_v4f16__nnan_nsz(<4 x half> %src0, <4 x half> %src1)
; GFX9-NEXT: v_pk_min_f16 v1, v1, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_minimum_v4f16__nnan_nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_pk_min_f16 v0, v0, v2
-; GFX940-NEXT: v_pk_min_f16 v1, v1, v3
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_minimum_v4f16__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1561,83 +1510,83 @@ define <8 x half> @v_minimum_v8f16(<8 x half> %src0, <8 x half> %src1) {
; GFX8-NEXT: v_or_b32_sdwa v3, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v8f16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_min_f16 v8, v3, v7
-; GFX9-NEXT: v_mov_b32_e32 v9, 0x7e00
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v3, v7
-; GFX9-NEXT: v_cndmask_b32_e32 v10, v9, v8, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v8, 16, v8
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v3, v7 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v9, v8, vcc
-; GFX9-NEXT: v_pk_min_f16 v7, v2, v6
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v2, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v9, v7, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v7, 16, v7
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v2, v6 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v9, v7, vcc
-; GFX9-NEXT: v_pk_min_f16 v6, v1, v5
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v7, v9, v6, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v6
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, v5 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v6, vcc
-; GFX9-NEXT: v_pk_min_f16 v5, v0, v4
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v9, v5, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v5
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v9, v5, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v0, v0, v6, s4
-; GFX9-NEXT: v_perm_b32 v1, v1, v7, s4
-; GFX9-NEXT: v_perm_b32 v2, v2, v8, s4
-; GFX9-NEXT: v_perm_b32 v3, v3, v10, s4
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_v8f16:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_pk_min_f16 v8, v3, v7
-; GFX940-NEXT: v_mov_b32_e32 v9, 0x7e00
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v3, v7
-; GFX940-NEXT: s_mov_b32 s0, 0x5040100
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v10, v9, v8, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v8, 16, v8
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v3, v7 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: v_pk_min_f16 v7, v2, v6
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v9, v8, vcc
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v2, v6
-; GFX940-NEXT: v_perm_b32 v3, v3, v10, s0
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v9, v7, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v7, 16, v7
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v2, v6 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: v_pk_min_f16 v6, v1, v5
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v9, v7, vcc
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v1, v5
-; GFX940-NEXT: v_perm_b32 v2, v2, v8, s0
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v9, v6, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v6, 16, v6
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v1, v5 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: v_pk_min_f16 v5, v0, v4
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v9, v6, vcc
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v4
-; GFX940-NEXT: v_perm_b32 v1, v1, v7, s0
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v9, v5, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v5, 16, v5
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v9, v5, vcc
-; GFX940-NEXT: v_perm_b32 v0, v0, v6, s0
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v8f16:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_pk_min_f16 v8, v3, v7
+; GFX900-NEXT: v_mov_b32_e32 v9, 0x7e00
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v3, v7
+; GFX900-NEXT: v_cndmask_b32_e32 v10, v9, v8, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v8, 16, v8
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v3, v7 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v3, v9, v8, vcc
+; GFX900-NEXT: v_pk_min_f16 v7, v2, v6
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v2, v6
+; GFX900-NEXT: v_cndmask_b32_e32 v8, v9, v7, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v7, 16, v7
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v2, v6 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v2, v9, v7, vcc
+; GFX900-NEXT: v_pk_min_f16 v6, v1, v5
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v1, v5
+; GFX900-NEXT: v_cndmask_b32_e32 v7, v9, v6, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v6, 16, v6
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v1, v5 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v9, v6, vcc
+; GFX900-NEXT: v_pk_min_f16 v5, v0, v4
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v4
+; GFX900-NEXT: v_cndmask_b32_e32 v6, v9, v5, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v5, 16, v5
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v9, v5, vcc
+; GFX900-NEXT: s_mov_b32 s4, 0x5040100
+; GFX900-NEXT: v_perm_b32 v0, v0, v6, s4
+; GFX900-NEXT: v_perm_b32 v1, v1, v7, s4
+; GFX900-NEXT: v_perm_b32 v2, v2, v8, s4
+; GFX900-NEXT: v_perm_b32 v3, v3, v10, s4
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v8f16:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_pk_min_f16 v8, v3, v7
+; GFX950-NEXT: v_mov_b32_e32 v9, 0x7e00
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v3, v7
+; GFX950-NEXT: s_mov_b32 s0, 0x5040100
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v10, v9, v8, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v8, 16, v8
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v3, v7 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: v_pk_min_f16 v7, v2, v6
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v9, v8, vcc
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v2, v6
+; GFX950-NEXT: v_perm_b32 v3, v3, v10, s0
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v8, v9, v7, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v7, 16, v7
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v2, v6 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: v_pk_min_f16 v6, v1, v5
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v2, v9, v7, vcc
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v1, v5
+; GFX950-NEXT: v_perm_b32 v2, v2, v8, s0
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v7, v9, v6, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v6, 16, v6
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v1, v5 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: v_pk_min_f16 v5, v0, v4
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v9, v6, vcc
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v0, v4
+; GFX950-NEXT: v_perm_b32 v1, v1, v7, s0
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v6, v9, v5, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v5, 16, v5
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v9, v5, vcc
+; GFX950-NEXT: v_perm_b32 v0, v0, v6, s0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v8f16:
; GFX10: ; %bb.0:
@@ -1818,147 +1767,147 @@ define <16 x half> @v_minimum_v16f16(<16 x half> %src0, <16 x half> %src1) {
; GFX8-NEXT: v_or_b32_sdwa v7, v7, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v16f16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_min_f16 v16, v7, v15
-; GFX9-NEXT: v_mov_b32_e32 v17, 0x7e00
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v7, v15
-; GFX9-NEXT: v_cndmask_b32_e32 v18, v17, v16, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v16, 16, v16
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v7, v15 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v7, v17, v16, vcc
-; GFX9-NEXT: v_pk_min_f16 v15, v6, v14
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v6, v14
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v17, v15, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v15, 16, v15
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v6, v14 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v17, v15, vcc
-; GFX9-NEXT: v_pk_min_f16 v14, v5, v13
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v5, v13
-; GFX9-NEXT: v_cndmask_b32_e32 v15, v17, v14, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v14, 16, v14
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v5, v13 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v17, v14, vcc
-; GFX9-NEXT: v_pk_min_f16 v13, v4, v12
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v4, v12
-; GFX9-NEXT: v_cndmask_b32_e32 v14, v17, v13, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v13, 16, v13
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v4, v12 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v17, v13, vcc
-; GFX9-NEXT: v_pk_min_f16 v12, v3, v11
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v3, v11
-; GFX9-NEXT: v_cndmask_b32_e32 v13, v17, v12, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v12, 16, v12
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v3, v11 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v17, v12, vcc
-; GFX9-NEXT: v_pk_min_f16 v11, v2, v10
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v2, v10
-; GFX9-NEXT: v_cndmask_b32_e32 v12, v17, v11, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v11, 16, v11
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v2, v10 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v17, v11, vcc
-; GFX9-NEXT: v_pk_min_f16 v10, v1, v9
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v9
-; GFX9-NEXT: v_cndmask_b32_e32 v11, v17, v10, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v10, 16, v10
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, v9 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v17, v10, vcc
-; GFX9-NEXT: v_pk_min_f16 v9, v0, v8
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v8
-; GFX9-NEXT: v_cndmask_b32_e32 v10, v17, v9, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v9, 16, v9
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v8 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v17, v9, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v0, v0, v10, s4
-; GFX9-NEXT: v_perm_b32 v1, v1, v11, s4
-; GFX9-NEXT: v_perm_b32 v2, v2, v12, s4
-; GFX9-NEXT: v_perm_b32 v3, v3, v13, s4
-; GFX9-NEXT: v_perm_b32 v4, v4, v14, s4
-; GFX9-NEXT: v_perm_b32 v5, v5, v15, s4
-; GFX9-NEXT: v_perm_b32 v6, v6, v16, s4
-; GFX9-NEXT: v_perm_b32 v7, v7, v18, s4
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_v16f16:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_pk_min_f16 v16, v7, v15
-; GFX940-NEXT: v_mov_b32_e32 v17, 0x7e00
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v7, v15
-; GFX940-NEXT: s_mov_b32 s0, 0x5040100
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v18, v17, v16, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v16, 16, v16
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v7, v15 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: v_pk_min_f16 v15, v6, v14
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v17, v16, vcc
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v6, v14
-; GFX940-NEXT: v_perm_b32 v7, v7, v18, s0
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v17, v15, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v15, 16, v15
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v6, v14 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: v_pk_min_f16 v14, v5, v13
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v17, v15, vcc
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v5, v13
-; GFX940-NEXT: v_perm_b32 v6, v6, v16, s0
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v15, v17, v14, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v14, 16, v14
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v5, v13 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: v_pk_min_f16 v13, v4, v12
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v17, v14, vcc
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v4, v12
-; GFX940-NEXT: v_perm_b32 v5, v5, v15, s0
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v14, v17, v13, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v13, 16, v13
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v4, v12 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: v_pk_min_f16 v12, v3, v11
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v17, v13, vcc
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v3, v11
-; GFX940-NEXT: v_perm_b32 v4, v4, v14, s0
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v13, v17, v12, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v12, 16, v12
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v3, v11 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: v_pk_min_f16 v11, v2, v10
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v17, v12, vcc
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v2, v10
-; GFX940-NEXT: v_perm_b32 v3, v3, v13, s0
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v12, v17, v11, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v11, 16, v11
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v2, v10 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: v_pk_min_f16 v10, v1, v9
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v17, v11, vcc
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v1, v9
-; GFX940-NEXT: v_perm_b32 v2, v2, v12, s0
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v11, v17, v10, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v10, 16, v10
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v1, v9 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: v_pk_min_f16 v9, v0, v8
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v17, v10, vcc
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v8
-; GFX940-NEXT: v_perm_b32 v1, v1, v11, s0
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v10, v17, v9, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v9, 16, v9
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v8 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v17, v9, vcc
-; GFX940-NEXT: v_perm_b32 v0, v0, v10, s0
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v16f16:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_pk_min_f16 v16, v7, v15
+; GFX900-NEXT: v_mov_b32_e32 v17, 0x7e00
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v7, v15
+; GFX900-NEXT: v_cndmask_b32_e32 v18, v17, v16, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v16, 16, v16
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v7, v15 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v7, v17, v16, vcc
+; GFX900-NEXT: v_pk_min_f16 v15, v6, v14
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v6, v14
+; GFX900-NEXT: v_cndmask_b32_e32 v16, v17, v15, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v15, 16, v15
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v6, v14 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v6, v17, v15, vcc
+; GFX900-NEXT: v_pk_min_f16 v14, v5, v13
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v5, v13
+; GFX900-NEXT: v_cndmask_b32_e32 v15, v17, v14, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v14, 16, v14
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v5, v13 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v5, v17, v14, vcc
+; GFX900-NEXT: v_pk_min_f16 v13, v4, v12
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v4, v12
+; GFX900-NEXT: v_cndmask_b32_e32 v14, v17, v13, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v13, 16, v13
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v4, v12 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v4, v17, v13, vcc
+; GFX900-NEXT: v_pk_min_f16 v12, v3, v11
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v3, v11
+; GFX900-NEXT: v_cndmask_b32_e32 v13, v17, v12, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v12, 16, v12
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v3, v11 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v3, v17, v12, vcc
+; GFX900-NEXT: v_pk_min_f16 v11, v2, v10
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v2, v10
+; GFX900-NEXT: v_cndmask_b32_e32 v12, v17, v11, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v11, 16, v11
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v2, v10 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v2, v17, v11, vcc
+; GFX900-NEXT: v_pk_min_f16 v10, v1, v9
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v1, v9
+; GFX900-NEXT: v_cndmask_b32_e32 v11, v17, v10, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v10, 16, v10
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v1, v9 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v17, v10, vcc
+; GFX900-NEXT: v_pk_min_f16 v9, v0, v8
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v8
+; GFX900-NEXT: v_cndmask_b32_e32 v10, v17, v9, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v9, 16, v9
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v0, v8 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v17, v9, vcc
+; GFX900-NEXT: s_mov_b32 s4, 0x5040100
+; GFX900-NEXT: v_perm_b32 v0, v0, v10, s4
+; GFX900-NEXT: v_perm_b32 v1, v1, v11, s4
+; GFX900-NEXT: v_perm_b32 v2, v2, v12, s4
+; GFX900-NEXT: v_perm_b32 v3, v3, v13, s4
+; GFX900-NEXT: v_perm_b32 v4, v4, v14, s4
+; GFX900-NEXT: v_perm_b32 v5, v5, v15, s4
+; GFX900-NEXT: v_perm_b32 v6, v6, v16, s4
+; GFX900-NEXT: v_perm_b32 v7, v7, v18, s4
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v16f16:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_pk_min_f16 v16, v7, v15
+; GFX950-NEXT: v_mov_b32_e32 v17, 0x7e00
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v7, v15
+; GFX950-NEXT: s_mov_b32 s0, 0x5040100
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v18, v17, v16, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v16, 16, v16
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v7, v15 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: v_pk_min_f16 v15, v6, v14
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v7, v17, v16, vcc
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v6, v14
+; GFX950-NEXT: v_perm_b32 v7, v7, v18, s0
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v16, v17, v15, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v15, 16, v15
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v6, v14 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: v_pk_min_f16 v14, v5, v13
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v6, v17, v15, vcc
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v5, v13
+; GFX950-NEXT: v_perm_b32 v6, v6, v16, s0
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v15, v17, v14, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v14, 16, v14
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v5, v13 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: v_pk_min_f16 v13, v4, v12
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v5, v17, v14, vcc
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v4, v12
+; GFX950-NEXT: v_perm_b32 v5, v5, v15, s0
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v14, v17, v13, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v13, 16, v13
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v4, v12 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: v_pk_min_f16 v12, v3, v11
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v4, v17, v13, vcc
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v3, v11
+; GFX950-NEXT: v_perm_b32 v4, v4, v14, s0
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v13, v17, v12, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v12, 16, v12
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v3, v11 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: v_pk_min_f16 v11, v2, v10
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v17, v12, vcc
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v2, v10
+; GFX950-NEXT: v_perm_b32 v3, v3, v13, s0
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v12, v17, v11, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v11, 16, v11
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v2, v10 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: v_pk_min_f16 v10, v1, v9
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v2, v17, v11, vcc
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v1, v9
+; GFX950-NEXT: v_perm_b32 v2, v2, v12, s0
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v11, v17, v10, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v10, 16, v10
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v1, v9 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: v_pk_min_f16 v9, v0, v8
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v17, v10, vcc
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v0, v8
+; GFX950-NEXT: v_perm_b32 v1, v1, v11, s0
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v10, v17, v9, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v9, 16, v9
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v0, v8 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v17, v9, vcc
+; GFX950-NEXT: v_perm_b32 v0, v0, v10, s0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v16f16:
; GFX10: ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll
index 2b3041290b5866..2614fb3bf9f737 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll
@@ -2,7 +2,8 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck -check-prefixes=GCN,GFX7 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GCN,GFX8 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GCN,GFX940 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX900 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX950 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX10 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GCN,GFX12 %s
@@ -26,24 +27,24 @@ define float @v_minimum_f32(float %src0, float %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_f32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e32 v2, v0, v1
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_f32:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f32_e32 v2, v0, v1
-; GFX940-NEXT: v_mov_b32_e32 v3, 0x7fc00000
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_f32:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f32_e32 v2, v0, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, 0x7fc00000
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_f32:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f32_e32 v2, v0, v1
+; GFX950-NEXT: v_mov_b32_e32 v3, 0x7fc00000
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_f32:
; GFX10: ; %bb.0:
@@ -94,12 +95,6 @@ define float @v_minimum_f32__nnan(float %src0, float %src1) {
; GFX9-NEXT: v_min_f32_e32 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_minimum_f32__nnan:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_minimum_f32__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -144,24 +139,24 @@ define float @v_minimum_f32__nsz(float %src0, float %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_f32__nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e32 v2, v0, v1
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_f32__nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f32_e32 v2, v0, v1
-; GFX940-NEXT: v_mov_b32_e32 v3, 0x7fc00000
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_f32__nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f32_e32 v2, v0, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, 0x7fc00000
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_f32__nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f32_e32 v2, v0, v1
+; GFX950-NEXT: v_mov_b32_e32 v3, 0x7fc00000
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_f32__nsz:
; GFX10: ; %bb.0:
@@ -212,12 +207,6 @@ define float @v_minimum_f32__nnan_nsz(float %src0, float %src1) {
; GFX9-NEXT: v_min_f32_e32 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_minimum_f32__nnan_nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_minimum_f32__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -264,26 +253,26 @@ define float @v_minimum_f32__nnan_src0(float %arg0, float %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_f32__nnan_src0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_add_f32_e32 v0, 1.0, v0
-; GFX9-NEXT: v_min_f32_e32 v2, v0, v1
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_f32__nnan_src0:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_add_f32_e32 v0, 1.0, v0
-; GFX940-NEXT: v_min_f32_e32 v2, v0, v1
-; GFX940-NEXT: v_mov_b32_e32 v3, 0x7fc00000
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_f32__nnan_src0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_add_f32_e32 v0, 1.0, v0
+; GFX900-NEXT: v_min_f32_e32 v2, v0, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, 0x7fc00000
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_f32__nnan_src0:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_add_f32_e32 v0, 1.0, v0
+; GFX950-NEXT: v_min_f32_e32 v2, v0, v1
+; GFX950-NEXT: v_mov_b32_e32 v3, 0x7fc00000
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_f32__nnan_src0:
; GFX10: ; %bb.0:
@@ -341,26 +330,26 @@ define float @v_minimum_f32__nnan_src1(float %src0, float %arg1) {
; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_f32__nnan_src1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_add_f32_e32 v1, 1.0, v1
-; GFX9-NEXT: v_min_f32_e32 v2, v0, v1
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_f32__nnan_src1:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_add_f32_e32 v1, 1.0, v1
-; GFX940-NEXT: v_min_f32_e32 v2, v0, v1
-; GFX940-NEXT: v_mov_b32_e32 v3, 0x7fc00000
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_f32__nnan_src1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_add_f32_e32 v1, 1.0, v1
+; GFX900-NEXT: v_min_f32_e32 v2, v0, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, 0x7fc00000
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_f32__nnan_src1:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_add_f32_e32 v1, 1.0, v1
+; GFX950-NEXT: v_min_f32_e32 v2, v0, v1
+; GFX950-NEXT: v_mov_b32_e32 v3, 0x7fc00000
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_f32__nnan_src1:
; GFX10: ; %bb.0:
@@ -424,32 +413,32 @@ define void @s_minimum_f32(float inreg %src0, float inreg %src1) {
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: s_minimum_f32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v0, s17
-; GFX9-NEXT: v_min_f32_e32 v1, s16, v0
-; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s16, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use v0
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: s_minimum_f32:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_mov_b32_e32 v0, s1
-; GFX940-NEXT: v_min_f32_e32 v1, s0, v0
-; GFX940-NEXT: v_mov_b32_e32 v2, 0x7fc00000
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, s0, v0
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX940-NEXT: ;;#ASMSTART
-; GFX940-NEXT: ; use v0
-; GFX940-NEXT: ;;#ASMEND
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_minimum_f32:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, s17
+; GFX900-NEXT: v_min_f32_e32 v1, s16, v0
+; GFX900-NEXT: v_mov_b32_e32 v2, 0x7fc00000
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, s16, v0
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use v0
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: s_minimum_f32:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_mov_b32_e32 v0, s1
+; GFX950-NEXT: v_min_f32_e32 v1, s0, v0
+; GFX950-NEXT: v_mov_b32_e32 v2, 0x7fc00000
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, s0, v0
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX950-NEXT: ;;#ASMSTART
+; GFX950-NEXT: ; use v0
+; GFX950-NEXT: ;;#ASMEND
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: s_minimum_f32:
; GFX10: ; %bb.0:
@@ -517,31 +506,31 @@ define <2 x float> @v_minimum_v2f32(<2 x float> %src0, <2 x float> %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v2f32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e32 v4, v0, v2
-; GFX9-NEXT: v_mov_b32_e32 v5, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX9-NEXT: v_min_f32_e32 v2, v1, v3
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_v2f32:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f32_e32 v4, v0, v2
-; GFX940-NEXT: v_mov_b32_e32 v5, 0x7fc00000
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX940-NEXT: v_min_f32_e32 v2, v1, v3
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v2f32:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f32_e32 v4, v0, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, 0x7fc00000
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
+; GFX900-NEXT: v_min_f32_e32 v2, v1, v3
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v2f32:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f32_e32 v4, v0, v2
+; GFX950-NEXT: v_mov_b32_e32 v5, 0x7fc00000
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX950-NEXT: v_min_f32_e32 v2, v1, v3
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v2f32:
; GFX10: ; %bb.0:
@@ -601,13 +590,6 @@ define <2 x float> @v_minimum_v2f32__nnan(<2 x float> %src0, <2 x float> %src1)
; GFX9-NEXT: v_min_f32_e32 v1, v1, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_minimum_v2f32__nnan:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f32_e32 v0, v0, v2
-; GFX940-NEXT: v_min_f32_e32 v1, v1, v3
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_minimum_v2f32__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -660,31 +642,31 @@ define <2 x float> @v_minimum_v2f32__nsz(<2 x float> %src0, <2 x float> %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v2f32__nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e32 v4, v0, v2
-; GFX9-NEXT: v_mov_b32_e32 v5, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX9-NEXT: v_min_f32_e32 v2, v1, v3
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_v2f32__nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f32_e32 v4, v0, v2
-; GFX940-NEXT: v_mov_b32_e32 v5, 0x7fc00000
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX940-NEXT: v_min_f32_e32 v2, v1, v3
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v2f32__nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f32_e32 v4, v0, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, 0x7fc00000
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
+; GFX900-NEXT: v_min_f32_e32 v2, v1, v3
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v2f32__nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f32_e32 v4, v0, v2
+; GFX950-NEXT: v_mov_b32_e32 v5, 0x7fc00000
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX950-NEXT: v_min_f32_e32 v2, v1, v3
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v2f32__nsz:
; GFX10: ; %bb.0:
@@ -744,13 +726,6 @@ define <2 x float> @v_minimum_v2f32__nnan_nsz(<2 x float> %src0, <2 x float> %sr
; GFX9-NEXT: v_min_f32_e32 v1, v1, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_minimum_v2f32__nnan_nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f32_e32 v0, v0, v2
-; GFX940-NEXT: v_min_f32_e32 v1, v1, v3
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_minimum_v2f32__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -813,40 +788,40 @@ define void @s_minimum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) {
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: s_minimum_v2f32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v0, s19
-; GFX9-NEXT: v_min_f32_e32 v1, s17, v0
-; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s17, v0
-; GFX9-NEXT: v_mov_b32_e32 v0, s18
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX9-NEXT: v_min_f32_e32 v3, s16, v0
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s16, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use v[0:1]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: s_minimum_v2f32:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_mov_b32_e32 v0, s3
-; GFX940-NEXT: v_min_f32_e32 v1, s1, v0
-; GFX940-NEXT: v_mov_b32_e32 v2, 0x7fc00000
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, s1, v0
-; GFX940-NEXT: v_mov_b32_e32 v0, s2
-; GFX940-NEXT: v_min_f32_e32 v3, s0, v0
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, s0, v0
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
-; GFX940-NEXT: ;;#ASMSTART
-; GFX940-NEXT: ; use v[0:1]
-; GFX940-NEXT: ;;#ASMEND
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_minimum_v2f32:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, s19
+; GFX900-NEXT: v_min_f32_e32 v1, s17, v0
+; GFX900-NEXT: v_mov_b32_e32 v2, 0x7fc00000
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, s17, v0
+; GFX900-NEXT: v_mov_b32_e32 v0, s18
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX900-NEXT: v_min_f32_e32 v3, s16, v0
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, s16, v0
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use v[0:1]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: s_minimum_v2f32:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_mov_b32_e32 v0, s3
+; GFX950-NEXT: v_min_f32_e32 v1, s1, v0
+; GFX950-NEXT: v_mov_b32_e32 v2, 0x7fc00000
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, s1, v0
+; GFX950-NEXT: v_mov_b32_e32 v0, s2
+; GFX950-NEXT: v_min_f32_e32 v3, s0, v0
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, s0, v0
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
+; GFX950-NEXT: ;;#ASMSTART
+; GFX950-NEXT: ; use v[0:1]
+; GFX950-NEXT: ;;#ASMEND
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: s_minimum_v2f32:
; GFX10: ; %bb.0:
@@ -927,38 +902,38 @@ define <3 x float> @v_minimum_v3f32(<3 x float> %src0, <3 x float> %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v3f32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e32 v6, v0, v3
-; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX9-NEXT: v_min_f32_e32 v3, v1, v4
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX9-NEXT: v_min_f32_e32 v3, v2, v5
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_v3f32:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f32_e32 v6, v0, v3
-; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX940-NEXT: v_min_f32_e32 v3, v1, v4
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX940-NEXT: v_min_f32_e32 v3, v2, v5
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v3f32:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f32_e32 v6, v0, v3
+; GFX900-NEXT: v_mov_b32_e32 v7, 0x7fc00000
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX900-NEXT: v_min_f32_e32 v3, v1, v4
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
+; GFX900-NEXT: v_min_f32_e32 v3, v2, v5
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
+; GFX900-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v3f32:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f32_e32 v6, v0, v3
+; GFX950-NEXT: v_mov_b32_e32 v7, 0x7fc00000
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
+; GFX950-NEXT: v_min_f32_e32 v3, v1, v4
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
+; GFX950-NEXT: v_min_f32_e32 v3, v2, v5
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v3f32:
; GFX10: ; %bb.0:
@@ -1028,14 +1003,6 @@ define <3 x float> @v_minimum_v3f32__nnan(<3 x float> %src0, <3 x float> %src1)
; GFX9-NEXT: v_min_f32_e32 v2, v2, v5
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_minimum_v3f32__nnan:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f32_e32 v0, v0, v3
-; GFX940-NEXT: v_min_f32_e32 v1, v1, v4
-; GFX940-NEXT: v_min_f32_e32 v2, v2, v5
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_minimum_v3f32__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1097,38 +1064,38 @@ define <3 x float> @v_minimum_v3f32__nsz(<3 x float> %src0, <3 x float> %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v3f32__nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e32 v6, v0, v3
-; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX9-NEXT: v_min_f32_e32 v3, v1, v4
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX9-NEXT: v_min_f32_e32 v3, v2, v5
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_v3f32__nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f32_e32 v6, v0, v3
-; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX940-NEXT: v_min_f32_e32 v3, v1, v4
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX940-NEXT: v_min_f32_e32 v3, v2, v5
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v3f32__nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f32_e32 v6, v0, v3
+; GFX900-NEXT: v_mov_b32_e32 v7, 0x7fc00000
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX900-NEXT: v_min_f32_e32 v3, v1, v4
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
+; GFX900-NEXT: v_min_f32_e32 v3, v2, v5
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
+; GFX900-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v3f32__nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f32_e32 v6, v0, v3
+; GFX950-NEXT: v_mov_b32_e32 v7, 0x7fc00000
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
+; GFX950-NEXT: v_min_f32_e32 v3, v1, v4
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
+; GFX950-NEXT: v_min_f32_e32 v3, v2, v5
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v3f32__nsz:
; GFX10: ; %bb.0:
@@ -1198,14 +1165,6 @@ define <3 x float> @v_minimum_v3f32__nnan_nsz(<3 x float> %src0, <3 x float> %sr
; GFX9-NEXT: v_min_f32_e32 v2, v2, v5
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_minimum_v3f32__nnan_nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f32_e32 v0, v0, v3
-; GFX940-NEXT: v_min_f32_e32 v1, v1, v4
-; GFX940-NEXT: v_min_f32_e32 v2, v2, v5
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_minimum_v3f32__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1273,45 +1232,45 @@ define <4 x float> @v_minimum_v4f32(<4 x float> %src0, <4 x float> %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v4f32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e32 v8, v0, v4
-; GFX9-NEXT: v_mov_b32_e32 v9, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX9-NEXT: v_min_f32_e32 v4, v1, v5
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
-; GFX9-NEXT: v_min_f32_e32 v4, v2, v6
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
-; GFX9-NEXT: v_min_f32_e32 v4, v3, v7
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_v4f32:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f32_e32 v8, v0, v4
-; GFX940-NEXT: v_mov_b32_e32 v9, 0x7fc00000
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
-; GFX940-NEXT: v_min_f32_e32 v4, v1, v5
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
-; GFX940-NEXT: v_min_f32_e32 v4, v2, v6
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
-; GFX940-NEXT: v_min_f32_e32 v4, v3, v7
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v4f32:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f32_e32 v8, v0, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, 0x7fc00000
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
+; GFX900-NEXT: v_min_f32_e32 v4, v1, v5
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
+; GFX900-NEXT: v_min_f32_e32 v4, v2, v6
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
+; GFX900-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
+; GFX900-NEXT: v_min_f32_e32 v4, v3, v7
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
+; GFX900-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v4f32:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f32_e32 v8, v0, v4
+; GFX950-NEXT: v_mov_b32_e32 v9, 0x7fc00000
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
+; GFX950-NEXT: v_min_f32_e32 v4, v1, v5
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
+; GFX950-NEXT: v_min_f32_e32 v4, v2, v6
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
+; GFX950-NEXT: v_min_f32_e32 v4, v3, v7
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v4f32:
; GFX10: ; %bb.0:
@@ -1391,15 +1350,6 @@ define <4 x float> @v_minimum_v4f32__nnan(<4 x float> %src0, <4 x float> %src1)
; GFX9-NEXT: v_min_f32_e32 v3, v3, v7
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_minimum_v4f32__nnan:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f32_e32 v0, v0, v4
-; GFX940-NEXT: v_min_f32_e32 v1, v1, v5
-; GFX940-NEXT: v_min_f32_e32 v2, v2, v6
-; GFX940-NEXT: v_min_f32_e32 v3, v3, v7
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_minimum_v4f32__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1469,45 +1419,45 @@ define <4 x float> @v_minimum_v4f32__nsz(<4 x float> %src0, <4 x float> %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v4f32__nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e32 v8, v0, v4
-; GFX9-NEXT: v_mov_b32_e32 v9, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX9-NEXT: v_min_f32_e32 v4, v1, v5
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
-; GFX9-NEXT: v_min_f32_e32 v4, v2, v6
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
-; GFX9-NEXT: v_min_f32_e32 v4, v3, v7
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_v4f32__nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f32_e32 v8, v0, v4
-; GFX940-NEXT: v_mov_b32_e32 v9, 0x7fc00000
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
-; GFX940-NEXT: v_min_f32_e32 v4, v1, v5
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
-; GFX940-NEXT: v_min_f32_e32 v4, v2, v6
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
-; GFX940-NEXT: v_min_f32_e32 v4, v3, v7
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v4f32__nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f32_e32 v8, v0, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, 0x7fc00000
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
+; GFX900-NEXT: v_min_f32_e32 v4, v1, v5
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
+; GFX900-NEXT: v_min_f32_e32 v4, v2, v6
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
+; GFX900-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
+; GFX900-NEXT: v_min_f32_e32 v4, v3, v7
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
+; GFX900-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v4f32__nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f32_e32 v8, v0, v4
+; GFX950-NEXT: v_mov_b32_e32 v9, 0x7fc00000
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
+; GFX950-NEXT: v_min_f32_e32 v4, v1, v5
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
+; GFX950-NEXT: v_min_f32_e32 v4, v2, v6
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
+; GFX950-NEXT: v_min_f32_e32 v4, v3, v7
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v4f32__nsz:
; GFX10: ; %bb.0:
@@ -1587,15 +1537,6 @@ define <4 x float> @v_minimum_v4f32__nnan_nsz(<4 x float> %src0, <4 x float> %sr
; GFX9-NEXT: v_min_f32_e32 v3, v3, v7
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_minimum_v4f32__nnan_nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f32_e32 v0, v0, v4
-; GFX940-NEXT: v_min_f32_e32 v1, v1, v5
-; GFX940-NEXT: v_min_f32_e32 v2, v2, v6
-; GFX940-NEXT: v_min_f32_e32 v3, v3, v7
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_minimum_v4f32__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1689,73 +1630,73 @@ define <8 x float> @v_minimum_v8f32(<8 x float> %src0, <8 x float> %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v7, v17, v8, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v8f32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e32 v16, v0, v8
-; GFX9-NEXT: v_mov_b32_e32 v17, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v8
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v17, v16, vcc
-; GFX9-NEXT: v_min_f32_e32 v8, v1, v9
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v9
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v17, v8, vcc
-; GFX9-NEXT: v_min_f32_e32 v8, v2, v10
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v10
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v17, v8, vcc
-; GFX9-NEXT: v_min_f32_e32 v8, v3, v11
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v3, v11
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v17, v8, vcc
-; GFX9-NEXT: v_min_f32_e32 v8, v4, v12
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v4, v12
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v17, v8, vcc
-; GFX9-NEXT: v_min_f32_e32 v8, v5, v13
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v5, v13
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v17, v8, vcc
-; GFX9-NEXT: v_min_f32_e32 v8, v6, v14
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v6, v14
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v17, v8, vcc
-; GFX9-NEXT: v_min_f32_e32 v8, v7, v15
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v7, v15
-; GFX9-NEXT: v_cndmask_b32_e32 v7, v17, v8, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_v8f32:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f32_e32 v16, v0, v8
-; GFX940-NEXT: v_mov_b32_e32 v17, 0x7fc00000
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v8
-; GFX940-NEXT: v_min_f32_e32 v8, v1, v9
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v17, v16, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v9
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v17, v8, vcc
-; GFX940-NEXT: v_min_f32_e32 v8, v2, v10
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v10
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v17, v8, vcc
-; GFX940-NEXT: v_min_f32_e32 v8, v3, v11
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v3, v11
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v17, v8, vcc
-; GFX940-NEXT: v_min_f32_e32 v8, v4, v12
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v4, v12
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v17, v8, vcc
-; GFX940-NEXT: v_min_f32_e32 v8, v5, v13
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v5, v13
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v17, v8, vcc
-; GFX940-NEXT: v_min_f32_e32 v8, v6, v14
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v6, v14
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v17, v8, vcc
-; GFX940-NEXT: v_min_f32_e32 v8, v7, v15
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v7, v15
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v17, v8, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v8f32:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f32_e32 v16, v0, v8
+; GFX900-NEXT: v_mov_b32_e32 v17, 0x7fc00000
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v8
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v17, v16, vcc
+; GFX900-NEXT: v_min_f32_e32 v8, v1, v9
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v1, v9
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v17, v8, vcc
+; GFX900-NEXT: v_min_f32_e32 v8, v2, v10
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v2, v10
+; GFX900-NEXT: v_cndmask_b32_e32 v2, v17, v8, vcc
+; GFX900-NEXT: v_min_f32_e32 v8, v3, v11
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v3, v11
+; GFX900-NEXT: v_cndmask_b32_e32 v3, v17, v8, vcc
+; GFX900-NEXT: v_min_f32_e32 v8, v4, v12
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v4, v12
+; GFX900-NEXT: v_cndmask_b32_e32 v4, v17, v8, vcc
+; GFX900-NEXT: v_min_f32_e32 v8, v5, v13
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v5, v13
+; GFX900-NEXT: v_cndmask_b32_e32 v5, v17, v8, vcc
+; GFX900-NEXT: v_min_f32_e32 v8, v6, v14
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v6, v14
+; GFX900-NEXT: v_cndmask_b32_e32 v6, v17, v8, vcc
+; GFX900-NEXT: v_min_f32_e32 v8, v7, v15
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v7, v15
+; GFX900-NEXT: v_cndmask_b32_e32 v7, v17, v8, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v8f32:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f32_e32 v16, v0, v8
+; GFX950-NEXT: v_mov_b32_e32 v17, 0x7fc00000
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v8
+; GFX950-NEXT: v_min_f32_e32 v8, v1, v9
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v17, v16, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v9
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v17, v8, vcc
+; GFX950-NEXT: v_min_f32_e32 v8, v2, v10
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v2, v10
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v2, v17, v8, vcc
+; GFX950-NEXT: v_min_f32_e32 v8, v3, v11
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v3, v11
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v17, v8, vcc
+; GFX950-NEXT: v_min_f32_e32 v8, v4, v12
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v4, v12
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v4, v17, v8, vcc
+; GFX950-NEXT: v_min_f32_e32 v8, v5, v13
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v5, v13
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v5, v17, v8, vcc
+; GFX950-NEXT: v_min_f32_e32 v8, v6, v14
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v6, v14
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v6, v17, v8, vcc
+; GFX950-NEXT: v_min_f32_e32 v8, v7, v15
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v7, v15
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v7, v17, v8, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v8f32:
; GFX10: ; %bb.0:
@@ -1968,136 +1909,136 @@ define <16 x float> @v_minimum_v16f32(<16 x float> %src0, <16 x float> %src1) {
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v16f32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX9-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: v_cmp_o_f32_e64 s[16:17], v0, v16
-; GFX9-NEXT: v_min_f32_e32 v0, v0, v16
-; GFX9-NEXT: buffer_load_dword v16, off, s[0:3], s32
-; GFX9-NEXT: v_writelane_b32 v31, s30, 0
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v17
-; GFX9-NEXT: v_min_f32_e32 v1, v1, v17
-; GFX9-NEXT: v_cmp_o_f32_e64 s[4:5], v2, v18
-; GFX9-NEXT: v_min_f32_e32 v2, v2, v18
-; GFX9-NEXT: v_mov_b32_e32 v17, 0x7fc00000
-; GFX9-NEXT: v_min_f32_e32 v18, v13, v29
-; GFX9-NEXT: v_cmp_o_f32_e64 s[28:29], v13, v29
-; GFX9-NEXT: v_writelane_b32 v31, s31, 1
-; GFX9-NEXT: v_cmp_o_f32_e64 s[6:7], v3, v19
-; GFX9-NEXT: v_min_f32_e32 v3, v3, v19
-; GFX9-NEXT: v_cmp_o_f32_e64 s[8:9], v4, v20
-; GFX9-NEXT: v_min_f32_e32 v4, v4, v20
-; GFX9-NEXT: v_cmp_o_f32_e64 s[10:11], v5, v21
-; GFX9-NEXT: v_min_f32_e32 v5, v5, v21
-; GFX9-NEXT: v_cmp_o_f32_e64 s[12:13], v6, v22
-; GFX9-NEXT: v_min_f32_e32 v6, v6, v22
-; GFX9-NEXT: v_cmp_o_f32_e64 s[14:15], v7, v23
-; GFX9-NEXT: v_min_f32_e32 v7, v7, v23
-; GFX9-NEXT: v_cmp_o_f32_e64 s[18:19], v8, v24
-; GFX9-NEXT: v_min_f32_e32 v8, v8, v24
-; GFX9-NEXT: v_cmp_o_f32_e64 s[20:21], v9, v25
-; GFX9-NEXT: v_min_f32_e32 v9, v9, v25
-; GFX9-NEXT: v_cmp_o_f32_e64 s[22:23], v10, v26
-; GFX9-NEXT: v_min_f32_e32 v10, v10, v26
-; GFX9-NEXT: v_cmp_o_f32_e64 s[24:25], v11, v27
-; GFX9-NEXT: v_min_f32_e32 v11, v11, v27
-; GFX9-NEXT: v_cmp_o_f32_e64 s[26:27], v12, v28
-; GFX9-NEXT: v_min_f32_e32 v12, v12, v28
-; GFX9-NEXT: v_min_f32_e32 v19, v14, v30
-; GFX9-NEXT: v_cmp_o_f32_e64 s[30:31], v14, v30
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v13, v17, v18, s[28:29]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v17, v0, s[16:17]
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v17, v2, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v17, v3, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v17, v4, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v17, v5, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v17, v6, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v17, v7, s[14:15]
-; GFX9-NEXT: v_cndmask_b32_e64 v8, v17, v8, s[18:19]
-; GFX9-NEXT: v_cndmask_b32_e64 v9, v17, v9, s[20:21]
-; GFX9-NEXT: v_cndmask_b32_e64 v10, v17, v10, s[22:23]
-; GFX9-NEXT: v_cndmask_b32_e64 v11, v17, v11, s[24:25]
-; GFX9-NEXT: v_cndmask_b32_e64 v12, v17, v12, s[26:27]
-; GFX9-NEXT: v_cndmask_b32_e64 v14, v17, v19, s[30:31]
-; GFX9-NEXT: v_readlane_b32 s31, v31, 1
-; GFX9-NEXT: v_readlane_b32 s30, v31, 0
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_min_f32_e32 v18, v15, v16
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v15, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v15, v17, v18, vcc
-; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_v16f32:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: scratch_load_dword v31, off, s32
-; GFX940-NEXT: v_mov_b32_e32 v32, 0x7fc00000
-; GFX940-NEXT: v_min_f32_e32 v33, v0, v16
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v16
-; GFX940-NEXT: v_min_f32_e32 v34, v1, v17
-; GFX940-NEXT: v_min_f32_e32 v35, v2, v18
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v32, v33, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v17
-; GFX940-NEXT: v_min_f32_e32 v36, v3, v19
-; GFX940-NEXT: v_min_f32_e32 v37, v4, v20
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v32, v34, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v18
-; GFX940-NEXT: v_min_f32_e32 v38, v5, v21
-; GFX940-NEXT: v_min_f32_e32 v39, v6, v22
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v32, v35, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v3, v19
-; GFX940-NEXT: v_min_f32_e32 v48, v7, v23
-; GFX940-NEXT: v_min_f32_e32 v49, v8, v24
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v32, v36, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v4, v20
-; GFX940-NEXT: v_min_f32_e32 v50, v9, v25
-; GFX940-NEXT: v_min_f32_e32 v51, v10, v26
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v32, v37, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v5, v21
-; GFX940-NEXT: v_min_f32_e32 v52, v11, v27
-; GFX940-NEXT: v_min_f32_e32 v53, v12, v28
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v32, v38, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v6, v22
-; GFX940-NEXT: v_min_f32_e32 v54, v13, v29
-; GFX940-NEXT: v_min_f32_e32 v55, v14, v30
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v32, v39, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v7, v23
-; GFX940-NEXT: s_waitcnt vmcnt(0)
-; GFX940-NEXT: v_min_f32_e32 v16, v15, v31
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v32, v48, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v8, v24
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v32, v49, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v9, v25
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v9, v32, v50, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v10, v26
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v10, v32, v51, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v11, v27
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v11, v32, v52, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v12, v28
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v12, v32, v53, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v13, v29
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v13, v32, v54, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v14, v30
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v14, v32, v55, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v15, v31
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v15, v32, v16, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v16f32:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX900-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; GFX900-NEXT: s_mov_b64 exec, s[4:5]
+; GFX900-NEXT: v_cmp_o_f32_e64 s[16:17], v0, v16
+; GFX900-NEXT: v_min_f32_e32 v0, v0, v16
+; GFX900-NEXT: buffer_load_dword v16, off, s[0:3], s32
+; GFX900-NEXT: v_writelane_b32 v31, s30, 0
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v1, v17
+; GFX900-NEXT: v_min_f32_e32 v1, v1, v17
+; GFX900-NEXT: v_cmp_o_f32_e64 s[4:5], v2, v18
+; GFX900-NEXT: v_min_f32_e32 v2, v2, v18
+; GFX900-NEXT: v_mov_b32_e32 v17, 0x7fc00000
+; GFX900-NEXT: v_min_f32_e32 v18, v13, v29
+; GFX900-NEXT: v_cmp_o_f32_e64 s[28:29], v13, v29
+; GFX900-NEXT: v_writelane_b32 v31, s31, 1
+; GFX900-NEXT: v_cmp_o_f32_e64 s[6:7], v3, v19
+; GFX900-NEXT: v_min_f32_e32 v3, v3, v19
+; GFX900-NEXT: v_cmp_o_f32_e64 s[8:9], v4, v20
+; GFX900-NEXT: v_min_f32_e32 v4, v4, v20
+; GFX900-NEXT: v_cmp_o_f32_e64 s[10:11], v5, v21
+; GFX900-NEXT: v_min_f32_e32 v5, v5, v21
+; GFX900-NEXT: v_cmp_o_f32_e64 s[12:13], v6, v22
+; GFX900-NEXT: v_min_f32_e32 v6, v6, v22
+; GFX900-NEXT: v_cmp_o_f32_e64 s[14:15], v7, v23
+; GFX900-NEXT: v_min_f32_e32 v7, v7, v23
+; GFX900-NEXT: v_cmp_o_f32_e64 s[18:19], v8, v24
+; GFX900-NEXT: v_min_f32_e32 v8, v8, v24
+; GFX900-NEXT: v_cmp_o_f32_e64 s[20:21], v9, v25
+; GFX900-NEXT: v_min_f32_e32 v9, v9, v25
+; GFX900-NEXT: v_cmp_o_f32_e64 s[22:23], v10, v26
+; GFX900-NEXT: v_min_f32_e32 v10, v10, v26
+; GFX900-NEXT: v_cmp_o_f32_e64 s[24:25], v11, v27
+; GFX900-NEXT: v_min_f32_e32 v11, v11, v27
+; GFX900-NEXT: v_cmp_o_f32_e64 s[26:27], v12, v28
+; GFX900-NEXT: v_min_f32_e32 v12, v12, v28
+; GFX900-NEXT: v_min_f32_e32 v19, v14, v30
+; GFX900-NEXT: v_cmp_o_f32_e64 s[30:31], v14, v30
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc
+; GFX900-NEXT: v_cndmask_b32_e64 v13, v17, v18, s[28:29]
+; GFX900-NEXT: v_cndmask_b32_e64 v0, v17, v0, s[16:17]
+; GFX900-NEXT: v_cndmask_b32_e64 v2, v17, v2, s[4:5]
+; GFX900-NEXT: v_cndmask_b32_e64 v3, v17, v3, s[6:7]
+; GFX900-NEXT: v_cndmask_b32_e64 v4, v17, v4, s[8:9]
+; GFX900-NEXT: v_cndmask_b32_e64 v5, v17, v5, s[10:11]
+; GFX900-NEXT: v_cndmask_b32_e64 v6, v17, v6, s[12:13]
+; GFX900-NEXT: v_cndmask_b32_e64 v7, v17, v7, s[14:15]
+; GFX900-NEXT: v_cndmask_b32_e64 v8, v17, v8, s[18:19]
+; GFX900-NEXT: v_cndmask_b32_e64 v9, v17, v9, s[20:21]
+; GFX900-NEXT: v_cndmask_b32_e64 v10, v17, v10, s[22:23]
+; GFX900-NEXT: v_cndmask_b32_e64 v11, v17, v11, s[24:25]
+; GFX900-NEXT: v_cndmask_b32_e64 v12, v17, v12, s[26:27]
+; GFX900-NEXT: v_cndmask_b32_e64 v14, v17, v19, s[30:31]
+; GFX900-NEXT: v_readlane_b32 s31, v31, 1
+; GFX900-NEXT: v_readlane_b32 s30, v31, 0
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_min_f32_e32 v18, v15, v16
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v15, v16
+; GFX900-NEXT: v_cndmask_b32_e32 v15, v17, v18, vcc
+; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; GFX900-NEXT: s_mov_b64 exec, s[4:5]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v16f32:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: scratch_load_dword v31, off, s32
+; GFX950-NEXT: v_mov_b32_e32 v32, 0x7fc00000
+; GFX950-NEXT: v_min_f32_e32 v33, v0, v16
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v16
+; GFX950-NEXT: v_min_f32_e32 v34, v1, v17
+; GFX950-NEXT: v_min_f32_e32 v35, v2, v18
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v32, v33, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v17
+; GFX950-NEXT: v_min_f32_e32 v36, v3, v19
+; GFX950-NEXT: v_min_f32_e32 v37, v4, v20
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v32, v34, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v2, v18
+; GFX950-NEXT: v_min_f32_e32 v38, v5, v21
+; GFX950-NEXT: v_min_f32_e32 v39, v6, v22
+; GFX950-NEXT: v_cndmask_b32_e32 v2, v32, v35, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v3, v19
+; GFX950-NEXT: v_min_f32_e32 v48, v7, v23
+; GFX950-NEXT: v_min_f32_e32 v49, v8, v24
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v32, v36, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v4, v20
+; GFX950-NEXT: v_min_f32_e32 v50, v9, v25
+; GFX950-NEXT: v_min_f32_e32 v51, v10, v26
+; GFX950-NEXT: v_cndmask_b32_e32 v4, v32, v37, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v5, v21
+; GFX950-NEXT: v_min_f32_e32 v52, v11, v27
+; GFX950-NEXT: v_min_f32_e32 v53, v12, v28
+; GFX950-NEXT: v_cndmask_b32_e32 v5, v32, v38, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v6, v22
+; GFX950-NEXT: v_min_f32_e32 v54, v13, v29
+; GFX950-NEXT: v_min_f32_e32 v55, v14, v30
+; GFX950-NEXT: v_cndmask_b32_e32 v6, v32, v39, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v7, v23
+; GFX950-NEXT: s_waitcnt vmcnt(0)
+; GFX950-NEXT: v_min_f32_e32 v16, v15, v31
+; GFX950-NEXT: v_cndmask_b32_e32 v7, v32, v48, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v8, v24
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v8, v32, v49, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v9, v25
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v9, v32, v50, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v10, v26
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v10, v32, v51, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v11, v27
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v11, v32, v52, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v12, v28
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v12, v32, v53, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v13, v29
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v13, v32, v54, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v14, v30
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v14, v32, v55, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v15, v31
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v15, v32, v16, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v16f32:
; GFX10: ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll
index 567582c9f58ff2..71fdd691a15122 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll
@@ -2,7 +2,8 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck -check-prefixes=GCN,GFX7 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GCN,GFX8 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GCN,GFX940 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX900 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX950 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX10 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GCN,GFX12 %s
@@ -28,26 +29,26 @@ define double @v_minimum_f64(double %src0, double %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_f64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX9-NEXT: v_mov_b32_e32 v1, 0x7ff80000
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_f64:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX940-NEXT: v_mov_b32_e32 v1, 0x7ff80000
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_f64:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3]
+; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX900-NEXT: v_mov_b32_e32 v1, 0x7ff80000
+; GFX900-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_f64:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX950-NEXT: v_mov_b32_e32 v1, 0x7ff80000
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_f64:
; GFX10: ; %bb.0:
@@ -100,12 +101,6 @@ define double @v_minimum_f64__nnan(double %src0, double %src1) {
; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_minimum_f64__nnan:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_minimum_f64__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -152,26 +147,26 @@ define double @v_minimum_f64__nsz(double %src0, double %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_f64__nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX9-NEXT: v_mov_b32_e32 v1, 0x7ff80000
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_f64__nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX940-NEXT: v_mov_b32_e32 v1, 0x7ff80000
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_f64__nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3]
+; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX900-NEXT: v_mov_b32_e32 v1, 0x7ff80000
+; GFX900-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_f64__nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX950-NEXT: v_mov_b32_e32 v1, 0x7ff80000
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_f64__nsz:
; GFX10: ; %bb.0:
@@ -224,12 +219,6 @@ define double @v_minimum_f64__nnan_nsz(double %src0, double %src1) {
; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_minimum_f64__nnan_nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_minimum_f64__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -278,28 +267,28 @@ define double @v_minimum_f64__nnan_src0(double %arg0, double %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_f64__nnan_src0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_add_f64 v[0:1], v[0:1], 1.0
-; GFX9-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX9-NEXT: v_mov_b32_e32 v1, 0x7ff80000
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_f64__nnan_src0:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_add_f64 v[0:1], v[0:1], 1.0
-; GFX940-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX940-NEXT: v_mov_b32_e32 v1, 0x7ff80000
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_f64__nnan_src0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_add_f64 v[0:1], v[0:1], 1.0
+; GFX900-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3]
+; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX900-NEXT: v_mov_b32_e32 v1, 0x7ff80000
+; GFX900-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_f64__nnan_src0:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_add_f64 v[0:1], v[0:1], 1.0
+; GFX950-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX950-NEXT: v_mov_b32_e32 v1, 0x7ff80000
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_f64__nnan_src0:
; GFX10: ; %bb.0:
@@ -362,28 +351,28 @@ define double @v_minimum_f64__nnan_src1(double %src0, double %arg1) {
; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_f64__nnan_src1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_add_f64 v[2:3], v[2:3], 1.0
-; GFX9-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX9-NEXT: v_mov_b32_e32 v1, 0x7ff80000
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_f64__nnan_src1:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_add_f64 v[2:3], v[2:3], 1.0
-; GFX940-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX940-NEXT: v_mov_b32_e32 v1, 0x7ff80000
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_f64__nnan_src1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_add_f64 v[2:3], v[2:3], 1.0
+; GFX900-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3]
+; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX900-NEXT: v_mov_b32_e32 v1, 0x7ff80000
+; GFX900-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_f64__nnan_src1:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_add_f64 v[2:3], v[2:3], 1.0
+; GFX950-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX950-NEXT: v_mov_b32_e32 v1, 0x7ff80000
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_f64__nnan_src1:
; GFX10: ; %bb.0:
@@ -454,35 +443,35 @@ define void @s_minimum_f64(double inreg %src0, double inreg %src1) {
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: s_minimum_f64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v0, s18
-; GFX9-NEXT: v_mov_b32_e32 v1, s19
-; GFX9-NEXT: v_min_f64 v[2:3], s[16:17], v[0:1]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, s[16:17], v[0:1]
-; GFX9-NEXT: v_mov_b32_e32 v4, 0x7ff80000
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use v[0:1]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: s_minimum_f64:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
-; GFX940-NEXT: v_min_f64 v[2:3], s[0:1], v[0:1]
-; GFX940-NEXT: v_mov_b32_e32 v4, 0x7ff80000
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, s[0:1], v[0:1]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX940-NEXT: ;;#ASMSTART
-; GFX940-NEXT: ; use v[0:1]
-; GFX940-NEXT: ;;#ASMEND
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_minimum_f64:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, s18
+; GFX900-NEXT: v_mov_b32_e32 v1, s19
+; GFX900-NEXT: v_min_f64 v[2:3], s[16:17], v[0:1]
+; GFX900-NEXT: v_cmp_u_f64_e32 vcc, s[16:17], v[0:1]
+; GFX900-NEXT: v_mov_b32_e32 v4, 0x7ff80000
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX900-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use v[0:1]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: s_minimum_f64:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
+; GFX950-NEXT: v_min_f64 v[2:3], s[0:1], v[0:1]
+; GFX950-NEXT: v_mov_b32_e32 v4, 0x7ff80000
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, s[0:1], v[0:1]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX950-NEXT: ;;#ASMSTART
+; GFX950-NEXT: ; use v[0:1]
+; GFX950-NEXT: ;;#ASMEND
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: s_minimum_f64:
; GFX10: ; %bb.0:
@@ -555,35 +544,35 @@ define <2 x double> @v_minimum_v2f64(<2 x double> %src0, <2 x double> %src1) {
; GFX8-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v2f64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f64 v[8:9], v[0:1], v[4:5]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX9-NEXT: v_min_f64 v[4:5], v[2:3], v[6:7]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x7ff80000
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_v2f64:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f64 v[8:9], v[0:1], v[4:5]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX940-NEXT: v_min_f64 v[4:5], v[2:3], v[6:7]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
-; GFX940-NEXT: v_mov_b32_e32 v8, 0x7ff80000
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v9, v8, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[6:7]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v5, v8, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v2f64:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f64 v[8:9], v[0:1], v[4:5]
+; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX900-NEXT: v_min_f64 v[4:5], v[2:3], v[6:7]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
+; GFX900-NEXT: v_mov_b32_e32 v3, 0x7ff80000
+; GFX900-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc
+; GFX900-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5]
+; GFX900-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v2f64:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f64 v[8:9], v[0:1], v[4:5]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX950-NEXT: v_min_f64 v[4:5], v[2:3], v[6:7]
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
+; GFX950-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v9, v8, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[6:7]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v5, v8, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v2f64:
; GFX10: ; %bb.0:
@@ -648,13 +637,6 @@ define <2 x double> @v_minimum_v2f64__nnan(<2 x double> %src0, <2 x double> %src
; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_minimum_v2f64__nnan:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
-; GFX940-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_minimum_v2f64__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -712,35 +694,35 @@ define <2 x double> @v_minimum_v2f64__nsz(<2 x double> %src0, <2 x double> %src1
; GFX8-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v2f64__nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f64 v[8:9], v[0:1], v[4:5]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX9-NEXT: v_min_f64 v[4:5], v[2:3], v[6:7]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x7ff80000
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_v2f64__nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f64 v[8:9], v[0:1], v[4:5]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX940-NEXT: v_min_f64 v[4:5], v[2:3], v[6:7]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
-; GFX940-NEXT: v_mov_b32_e32 v8, 0x7ff80000
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v9, v8, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[6:7]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v5, v8, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v2f64__nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f64 v[8:9], v[0:1], v[4:5]
+; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX900-NEXT: v_min_f64 v[4:5], v[2:3], v[6:7]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
+; GFX900-NEXT: v_mov_b32_e32 v3, 0x7ff80000
+; GFX900-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc
+; GFX900-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5]
+; GFX900-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v2f64__nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f64 v[8:9], v[0:1], v[4:5]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX950-NEXT: v_min_f64 v[4:5], v[2:3], v[6:7]
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
+; GFX950-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v9, v8, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[6:7]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v5, v8, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v2f64__nsz:
; GFX10: ; %bb.0:
@@ -805,13 +787,6 @@ define <2 x double> @v_minimum_v2f64__nnan_nsz(<2 x double> %src0, <2 x double>
; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_minimum_v2f64__nnan_nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
-; GFX940-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_minimum_v2f64__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -883,46 +858,46 @@ define void @s_minimum_v2f64(<2 x double> inreg %src0, <2 x double> inreg %src1)
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: s_minimum_v2f64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v0, s22
-; GFX9-NEXT: v_mov_b32_e32 v4, s20
-; GFX9-NEXT: v_mov_b32_e32 v1, s23
-; GFX9-NEXT: v_mov_b32_e32 v5, s21
-; GFX9-NEXT: v_min_f64 v[2:3], s[18:19], v[0:1]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, s[18:19], v[0:1]
-; GFX9-NEXT: v_min_f64 v[0:1], s[16:17], v[4:5]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], s[16:17], v[4:5]
-; GFX9-NEXT: v_mov_b32_e32 v6, 0x7ff80000
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, 0, s[4:5]
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use v[0:3]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: s_minimum_v2f64:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[18:19]
-; GFX940-NEXT: v_min_f64 v[2:3], s[2:3], v[0:1]
-; GFX940-NEXT: v_mov_b32_e32 v6, 0x7ff80000
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, s[2:3], v[0:1]
-; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[16:17]
-; GFX940-NEXT: v_min_f64 v[4:5], s[0:1], v[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, s[0:1], v[0:1]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX940-NEXT: ;;#ASMSTART
-; GFX940-NEXT: ; use v[0:3]
-; GFX940-NEXT: ;;#ASMEND
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_minimum_v2f64:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, s22
+; GFX900-NEXT: v_mov_b32_e32 v4, s20
+; GFX900-NEXT: v_mov_b32_e32 v1, s23
+; GFX900-NEXT: v_mov_b32_e32 v5, s21
+; GFX900-NEXT: v_min_f64 v[2:3], s[18:19], v[0:1]
+; GFX900-NEXT: v_cmp_u_f64_e32 vcc, s[18:19], v[0:1]
+; GFX900-NEXT: v_min_f64 v[0:1], s[16:17], v[4:5]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[4:5], s[16:17], v[4:5]
+; GFX900-NEXT: v_mov_b32_e32 v6, 0x7ff80000
+; GFX900-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc
+; GFX900-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
+; GFX900-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[4:5]
+; GFX900-NEXT: v_cndmask_b32_e64 v0, v0, 0, s[4:5]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use v[0:3]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: s_minimum_v2f64:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[18:19]
+; GFX950-NEXT: v_min_f64 v[2:3], s[2:3], v[0:1]
+; GFX950-NEXT: v_mov_b32_e32 v6, 0x7ff80000
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, s[2:3], v[0:1]
+; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[16:17]
+; GFX950-NEXT: v_min_f64 v[4:5], s[0:1], v[0:1]
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, s[0:1], v[0:1]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
+; GFX950-NEXT: ;;#ASMSTART
+; GFX950-NEXT: ; use v[0:3]
+; GFX950-NEXT: ;;#ASMEND
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: s_minimum_v2f64:
; GFX10: ; %bb.0:
@@ -1012,44 +987,44 @@ define <3 x double> @v_minimum_v3f64(<3 x double> %src0, <3 x double> %src1) {
; GFX8-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v3f64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f64 v[12:13], v[0:1], v[6:7]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX9-NEXT: v_min_f64 v[6:7], v[2:3], v[8:9]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
-; GFX9-NEXT: v_min_f64 v[8:9], v[4:5], v[10:11]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
-; GFX9-NEXT: v_mov_b32_e32 v5, 0x7ff80000
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_v3f64:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f64 v[12:13], v[0:1], v[6:7]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX940-NEXT: v_min_f64 v[6:7], v[2:3], v[8:9]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
-; GFX940-NEXT: v_mov_b32_e32 v12, 0x7ff80000
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v13, v12, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[8:9]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v12, vcc
-; GFX940-NEXT: v_min_f64 v[6:7], v[4:5], v[10:11]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[10:11]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v6, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v7, v12, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v3f64:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f64 v[12:13], v[0:1], v[6:7]
+; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX900-NEXT: v_min_f64 v[6:7], v[2:3], v[8:9]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
+; GFX900-NEXT: v_min_f64 v[8:9], v[4:5], v[10:11]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
+; GFX900-NEXT: v_mov_b32_e32 v5, 0x7ff80000
+; GFX900-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc
+; GFX900-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5]
+; GFX900-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5]
+; GFX900-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7]
+; GFX900-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v3f64:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f64 v[12:13], v[0:1], v[6:7]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX950-NEXT: v_min_f64 v[6:7], v[2:3], v[8:9]
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
+; GFX950-NEXT: v_mov_b32_e32 v12, 0x7ff80000
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v13, v12, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[8:9]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v7, v12, vcc
+; GFX950-NEXT: v_min_f64 v[6:7], v[4:5], v[10:11]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[10:11]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v4, v6, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v5, v7, v12, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v3f64:
; GFX10: ; %bb.0:
@@ -1125,14 +1100,6 @@ define <3 x double> @v_minimum_v3f64__nnan(<3 x double> %src0, <3 x double> %src
; GFX9-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_minimum_v3f64__nnan:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
-; GFX940-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
-; GFX940-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_minimum_v3f64__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1201,44 +1168,44 @@ define <3 x double> @v_minimum_v3f64__nsz(<3 x double> %src0, <3 x double> %src1
; GFX8-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v3f64__nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f64 v[12:13], v[0:1], v[6:7]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX9-NEXT: v_min_f64 v[6:7], v[2:3], v[8:9]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
-; GFX9-NEXT: v_min_f64 v[8:9], v[4:5], v[10:11]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
-; GFX9-NEXT: v_mov_b32_e32 v5, 0x7ff80000
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_v3f64__nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f64 v[12:13], v[0:1], v[6:7]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX940-NEXT: v_min_f64 v[6:7], v[2:3], v[8:9]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
-; GFX940-NEXT: v_mov_b32_e32 v12, 0x7ff80000
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v13, v12, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[8:9]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v12, vcc
-; GFX940-NEXT: v_min_f64 v[6:7], v[4:5], v[10:11]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[10:11]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v6, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v7, v12, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v3f64__nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f64 v[12:13], v[0:1], v[6:7]
+; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX900-NEXT: v_min_f64 v[6:7], v[2:3], v[8:9]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
+; GFX900-NEXT: v_min_f64 v[8:9], v[4:5], v[10:11]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
+; GFX900-NEXT: v_mov_b32_e32 v5, 0x7ff80000
+; GFX900-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc
+; GFX900-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5]
+; GFX900-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5]
+; GFX900-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7]
+; GFX900-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v3f64__nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f64 v[12:13], v[0:1], v[6:7]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX950-NEXT: v_min_f64 v[6:7], v[2:3], v[8:9]
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
+; GFX950-NEXT: v_mov_b32_e32 v12, 0x7ff80000
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v13, v12, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[8:9]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v7, v12, vcc
+; GFX950-NEXT: v_min_f64 v[6:7], v[4:5], v[10:11]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[10:11]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v4, v6, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v5, v7, v12, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v3f64__nsz:
; GFX10: ; %bb.0:
@@ -1314,14 +1281,6 @@ define <3 x double> @v_minimum_v3f64__nnan_nsz(<3 x double> %src0, <3 x double>
; GFX9-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_minimum_v3f64__nnan_nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
-; GFX940-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
-; GFX940-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_minimum_v3f64__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1398,53 +1357,53 @@ define <4 x double> @v_minimum_v4f64(<4 x double> %src0, <4 x double> %src1) {
; GFX8-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v4f64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f64 v[16:17], v[0:1], v[8:9]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
-; GFX9-NEXT: v_min_f64 v[8:9], v[2:3], v[10:11]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
-; GFX9-NEXT: v_min_f64 v[10:11], v[4:5], v[12:13]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
-; GFX9-NEXT: v_min_f64 v[12:13], v[6:7], v[14:15]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
-; GFX9-NEXT: v_mov_b32_e32 v7, 0x7ff80000
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_v4f64:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f64 v[16:17], v[0:1], v[8:9]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
-; GFX940-NEXT: v_min_f64 v[8:9], v[2:3], v[10:11]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
-; GFX940-NEXT: v_mov_b32_e32 v16, 0x7ff80000
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v17, v16, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[10:11]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v8, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v9, v16, vcc
-; GFX940-NEXT: v_min_f64 v[8:9], v[4:5], v[12:13]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[12:13]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v8, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v9, v16, vcc
-; GFX940-NEXT: v_min_f64 v[8:9], v[6:7], v[14:15]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[14:15]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v6, v8, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v9, v16, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v4f64:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f64 v[16:17], v[0:1], v[8:9]
+; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX900-NEXT: v_min_f64 v[8:9], v[2:3], v[10:11]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
+; GFX900-NEXT: v_min_f64 v[10:11], v[4:5], v[12:13]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
+; GFX900-NEXT: v_min_f64 v[12:13], v[6:7], v[14:15]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
+; GFX900-NEXT: v_mov_b32_e32 v7, 0x7ff80000
+; GFX900-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc
+; GFX900-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5]
+; GFX900-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5]
+; GFX900-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7]
+; GFX900-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7]
+; GFX900-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9]
+; GFX900-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v4f64:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f64 v[16:17], v[0:1], v[8:9]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX950-NEXT: v_min_f64 v[8:9], v[2:3], v[10:11]
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
+; GFX950-NEXT: v_mov_b32_e32 v16, 0x7ff80000
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v17, v16, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[10:11]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v8, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v9, v16, vcc
+; GFX950-NEXT: v_min_f64 v[8:9], v[4:5], v[12:13]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[12:13]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v4, v8, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v5, v9, v16, vcc
+; GFX950-NEXT: v_min_f64 v[8:9], v[6:7], v[14:15]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[14:15]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v6, v8, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v7, v9, v16, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v4f64:
; GFX10: ; %bb.0:
@@ -1532,15 +1491,6 @@ define <4 x double> @v_minimum_v4f64__nnan(<4 x double> %src0, <4 x double> %src
; GFX9-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_minimum_v4f64__nnan:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
-; GFX940-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
-; GFX940-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
-; GFX940-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_minimum_v4f64__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1620,53 +1570,53 @@ define <4 x double> @v_minimum_v4f64__nsz(<4 x double> %src0, <4 x double> %src1
; GFX8-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v4f64__nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f64 v[16:17], v[0:1], v[8:9]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
-; GFX9-NEXT: v_min_f64 v[8:9], v[2:3], v[10:11]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
-; GFX9-NEXT: v_min_f64 v[10:11], v[4:5], v[12:13]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
-; GFX9-NEXT: v_min_f64 v[12:13], v[6:7], v[14:15]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
-; GFX9-NEXT: v_mov_b32_e32 v7, 0x7ff80000
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_v4f64__nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f64 v[16:17], v[0:1], v[8:9]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
-; GFX940-NEXT: v_min_f64 v[8:9], v[2:3], v[10:11]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
-; GFX940-NEXT: v_mov_b32_e32 v16, 0x7ff80000
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v17, v16, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[10:11]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v8, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v9, v16, vcc
-; GFX940-NEXT: v_min_f64 v[8:9], v[4:5], v[12:13]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[12:13]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v8, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v9, v16, vcc
-; GFX940-NEXT: v_min_f64 v[8:9], v[6:7], v[14:15]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[14:15]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v6, v8, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v9, v16, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v4f64__nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f64 v[16:17], v[0:1], v[8:9]
+; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX900-NEXT: v_min_f64 v[8:9], v[2:3], v[10:11]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
+; GFX900-NEXT: v_min_f64 v[10:11], v[4:5], v[12:13]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
+; GFX900-NEXT: v_min_f64 v[12:13], v[6:7], v[14:15]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
+; GFX900-NEXT: v_mov_b32_e32 v7, 0x7ff80000
+; GFX900-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc
+; GFX900-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5]
+; GFX900-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5]
+; GFX900-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7]
+; GFX900-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7]
+; GFX900-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9]
+; GFX900-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v4f64__nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f64 v[16:17], v[0:1], v[8:9]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX950-NEXT: v_min_f64 v[8:9], v[2:3], v[10:11]
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
+; GFX950-NEXT: v_mov_b32_e32 v16, 0x7ff80000
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v17, v16, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[10:11]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v8, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v9, v16, vcc
+; GFX950-NEXT: v_min_f64 v[8:9], v[4:5], v[12:13]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[12:13]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v4, v8, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v5, v9, v16, vcc
+; GFX950-NEXT: v_min_f64 v[8:9], v[6:7], v[14:15]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[14:15]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v6, v8, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v7, v9, v16, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v4f64__nsz:
; GFX10: ; %bb.0:
@@ -1754,15 +1704,6 @@ define <4 x double> @v_minimum_v4f64__nnan_nsz(<4 x double> %src0, <4 x double>
; GFX9-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_minimum_v4f64__nnan_nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
-; GFX940-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
-; GFX940-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
-; GFX940-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_minimum_v4f64__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1878,89 +1819,89 @@ define <8 x double> @v_minimum_v8f64(<8 x double> %src0, <8 x double> %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v15, v19, v34, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v8f64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32
-; GFX9-NEXT: v_min_f64 v[32:33], v[2:3], v[18:19]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[18:19]
-; GFX9-NEXT: v_min_f64 v[18:19], v[4:5], v[20:21]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[4:5], v[20:21]
-; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], v[16:17]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[8:9], v[0:1], v[16:17]
-; GFX9-NEXT: v_mov_b32_e32 v34, 0x7ff80000
-; GFX9-NEXT: v_min_f64 v[20:21], v[6:7], v[22:23]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[6:7], v[6:7], v[22:23]
-; GFX9-NEXT: v_min_f64 v[16:17], v[8:9], v[24:25]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[24:25]
-; GFX9-NEXT: v_min_f64 v[22:23], v[10:11], v[26:27]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[12:13], v[10:11], v[26:27]
-; GFX9-NEXT: v_min_f64 v[24:25], v[12:13], v[28:29]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[14:15], v[12:13], v[28:29]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v3, v34, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v32, 0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v33, v34, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v18, 0, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v19, v34, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v20, 0, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v21, v34, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v8, v16, 0, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v9, v17, v34, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v10, v22, 0, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v11, v23, v34, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v12, v24, 0, s[14:15]
-; GFX9-NEXT: v_cndmask_b32_e64 v13, v25, v34, s[14:15]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_min_f64 v[18:19], v[14:15], v[30:31]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[30:31]
-; GFX9-NEXT: v_cndmask_b32_e64 v14, v18, 0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v15, v19, v34, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_v8f64:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: scratch_load_dword v31, off, s32
-; GFX940-NEXT: v_mov_b32_e32 v54, 0x7ff80000
-; GFX940-NEXT: v_min_f64 v[32:33], v[0:1], v[16:17]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[16:17]
-; GFX940-NEXT: v_min_f64 v[34:35], v[2:3], v[18:19]
-; GFX940-NEXT: v_min_f64 v[36:37], v[4:5], v[20:21]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v32, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v33, v54, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[18:19]
-; GFX940-NEXT: v_min_f64 v[38:39], v[6:7], v[22:23]
-; GFX940-NEXT: v_min_f64 v[48:49], v[8:9], v[24:25]
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v34, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v35, v54, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[20:21]
-; GFX940-NEXT: v_min_f64 v[50:51], v[10:11], v[26:27]
-; GFX940-NEXT: v_min_f64 v[52:53], v[12:13], v[28:29]
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v36, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v37, v54, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[22:23]
-; GFX940-NEXT: s_waitcnt vmcnt(0)
-; GFX940-NEXT: v_min_f64 v[16:17], v[14:15], v[30:31]
-; GFX940-NEXT: v_cndmask_b32_e64 v6, v38, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v39, v54, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[8:9], v[24:25]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v8, v48, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v9, v49, v54, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[10:11], v[26:27]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v10, v50, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v11, v51, v54, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[12:13], v[28:29]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v12, v52, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v13, v53, v54, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[30:31]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v14, v16, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v15, v17, v54, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v8f64:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32
+; GFX900-NEXT: v_min_f64 v[32:33], v[2:3], v[18:19]
+; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[18:19]
+; GFX900-NEXT: v_min_f64 v[18:19], v[4:5], v[20:21]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[4:5], v[4:5], v[20:21]
+; GFX900-NEXT: v_min_f64 v[2:3], v[0:1], v[16:17]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[8:9], v[0:1], v[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v34, 0x7ff80000
+; GFX900-NEXT: v_min_f64 v[20:21], v[6:7], v[22:23]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[6:7], v[6:7], v[22:23]
+; GFX900-NEXT: v_min_f64 v[16:17], v[8:9], v[24:25]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[24:25]
+; GFX900-NEXT: v_min_f64 v[22:23], v[10:11], v[26:27]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[12:13], v[10:11], v[26:27]
+; GFX900-NEXT: v_min_f64 v[24:25], v[12:13], v[28:29]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[14:15], v[12:13], v[28:29]
+; GFX900-NEXT: v_cndmask_b32_e64 v0, v2, 0, s[8:9]
+; GFX900-NEXT: v_cndmask_b32_e64 v1, v3, v34, s[8:9]
+; GFX900-NEXT: v_cndmask_b32_e64 v2, v32, 0, vcc
+; GFX900-NEXT: v_cndmask_b32_e32 v3, v33, v34, vcc
+; GFX900-NEXT: v_cndmask_b32_e64 v4, v18, 0, s[4:5]
+; GFX900-NEXT: v_cndmask_b32_e64 v5, v19, v34, s[4:5]
+; GFX900-NEXT: v_cndmask_b32_e64 v6, v20, 0, s[6:7]
+; GFX900-NEXT: v_cndmask_b32_e64 v7, v21, v34, s[6:7]
+; GFX900-NEXT: v_cndmask_b32_e64 v8, v16, 0, s[10:11]
+; GFX900-NEXT: v_cndmask_b32_e64 v9, v17, v34, s[10:11]
+; GFX900-NEXT: v_cndmask_b32_e64 v10, v22, 0, s[12:13]
+; GFX900-NEXT: v_cndmask_b32_e64 v11, v23, v34, s[12:13]
+; GFX900-NEXT: v_cndmask_b32_e64 v12, v24, 0, s[14:15]
+; GFX900-NEXT: v_cndmask_b32_e64 v13, v25, v34, s[14:15]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_min_f64 v[18:19], v[14:15], v[30:31]
+; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[30:31]
+; GFX900-NEXT: v_cndmask_b32_e64 v14, v18, 0, vcc
+; GFX900-NEXT: v_cndmask_b32_e32 v15, v19, v34, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v8f64:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: scratch_load_dword v31, off, s32
+; GFX950-NEXT: v_mov_b32_e32 v54, 0x7ff80000
+; GFX950-NEXT: v_min_f64 v[32:33], v[0:1], v[16:17]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[16:17]
+; GFX950-NEXT: v_min_f64 v[34:35], v[2:3], v[18:19]
+; GFX950-NEXT: v_min_f64 v[36:37], v[4:5], v[20:21]
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v32, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v33, v54, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[18:19]
+; GFX950-NEXT: v_min_f64 v[38:39], v[6:7], v[22:23]
+; GFX950-NEXT: v_min_f64 v[48:49], v[8:9], v[24:25]
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v34, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v35, v54, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[20:21]
+; GFX950-NEXT: v_min_f64 v[50:51], v[10:11], v[26:27]
+; GFX950-NEXT: v_min_f64 v[52:53], v[12:13], v[28:29]
+; GFX950-NEXT: v_cndmask_b32_e64 v4, v36, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v5, v37, v54, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[22:23]
+; GFX950-NEXT: s_waitcnt vmcnt(0)
+; GFX950-NEXT: v_min_f64 v[16:17], v[14:15], v[30:31]
+; GFX950-NEXT: v_cndmask_b32_e64 v6, v38, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v7, v39, v54, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[8:9], v[24:25]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v8, v48, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v9, v49, v54, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[10:11], v[26:27]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v10, v50, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v11, v51, v54, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[12:13], v[28:29]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v12, v52, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v13, v53, v54, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[30:31]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v14, v16, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v15, v17, v54, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v8f64:
; GFX10: ; %bb.0:
@@ -2332,295 +2273,295 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1)
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v16f64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX9-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4
-; GFX9-NEXT: v_writelane_b32 v34, s30, 0
-; GFX9-NEXT: v_writelane_b32 v34, s31, 1
-; GFX9-NEXT: v_writelane_b32 v34, s34, 2
-; GFX9-NEXT: v_writelane_b32 v34, s35, 3
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[31:32]
-; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[31:32]
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:16
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:12
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[31:32]
-; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[31:32]
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:24
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:20
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, 0, s[4:5]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[31:32]
-; GFX9-NEXT: v_min_f64 v[4:5], v[4:5], v[31:32]
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:32
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v4, 0, s[6:7]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[31:32]
-; GFX9-NEXT: v_min_f64 v[6:7], v[6:7], v[31:32]
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:36
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:40
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v6, 0, s[8:9]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[31:32]
-; GFX9-NEXT: v_min_f64 v[8:9], v[8:9], v[31:32]
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:48
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:44
-; GFX9-NEXT: v_cndmask_b32_e64 v8, v8, 0, s[10:11]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e64 s[12:13], v[10:11], v[31:32]
-; GFX9-NEXT: v_min_f64 v[10:11], v[10:11], v[31:32]
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:56
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:52
-; GFX9-NEXT: v_cndmask_b32_e64 v10, v10, 0, s[12:13]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e64 s[14:15], v[12:13], v[31:32]
-; GFX9-NEXT: v_min_f64 v[12:13], v[12:13], v[31:32]
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:64
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:60
-; GFX9-NEXT: v_cndmask_b32_e64 v12, v12, 0, s[14:15]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e64 s[16:17], v[14:15], v[31:32]
-; GFX9-NEXT: v_min_f64 v[14:15], v[14:15], v[31:32]
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:68
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:72
-; GFX9-NEXT: v_cndmask_b32_e64 v14, v14, 0, s[16:17]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e64 s[18:19], v[16:17], v[31:32]
-; GFX9-NEXT: v_min_f64 v[16:17], v[16:17], v[31:32]
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:80
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:76
-; GFX9-NEXT: v_cndmask_b32_e64 v16, v16, 0, s[18:19]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e64 s[20:21], v[18:19], v[31:32]
-; GFX9-NEXT: v_min_f64 v[18:19], v[18:19], v[31:32]
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:88
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:84
-; GFX9-NEXT: v_cndmask_b32_e64 v18, v18, 0, s[20:21]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e64 s[22:23], v[20:21], v[31:32]
-; GFX9-NEXT: v_min_f64 v[20:21], v[20:21], v[31:32]
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:96
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:92
-; GFX9-NEXT: v_cndmask_b32_e64 v20, v20, 0, s[22:23]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e64 s[24:25], v[22:23], v[31:32]
-; GFX9-NEXT: v_min_f64 v[22:23], v[22:23], v[31:32]
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:100
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:104
-; GFX9-NEXT: v_cndmask_b32_e64 v22, v22, 0, s[24:25]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e64 s[26:27], v[24:25], v[31:32]
-; GFX9-NEXT: v_min_f64 v[24:25], v[24:25], v[31:32]
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:112
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:108
-; GFX9-NEXT: v_cndmask_b32_e64 v24, v24, 0, s[26:27]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e64 s[28:29], v[26:27], v[31:32]
-; GFX9-NEXT: v_min_f64 v[26:27], v[26:27], v[31:32]
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:120
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116
-; GFX9-NEXT: v_cndmask_b32_e64 v26, v26, 0, s[28:29]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e64 s[30:31], v[28:29], v[31:32]
-; GFX9-NEXT: v_min_f64 v[28:29], v[28:29], v[31:32]
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32
-; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:128
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:124
-; GFX9-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[30:31]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e64 s[34:35], v[30:31], v[32:33]
-; GFX9-NEXT: v_min_f64 v[30:31], v[30:31], v[32:33]
-; GFX9-NEXT: v_mov_b32_e32 v32, 0x7ff80000
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v32, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v32, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v32, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, v32, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v9, v9, v32, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v11, v11, v32, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v13, v13, v32, s[14:15]
-; GFX9-NEXT: v_cndmask_b32_e64 v15, v15, v32, s[16:17]
-; GFX9-NEXT: v_cndmask_b32_e64 v17, v17, v32, s[18:19]
-; GFX9-NEXT: v_cndmask_b32_e64 v19, v19, v32, s[20:21]
-; GFX9-NEXT: v_cndmask_b32_e64 v21, v21, v32, s[22:23]
-; GFX9-NEXT: v_cndmask_b32_e64 v23, v23, v32, s[24:25]
-; GFX9-NEXT: v_cndmask_b32_e64 v25, v25, v32, s[26:27]
-; GFX9-NEXT: v_cndmask_b32_e64 v27, v27, v32, s[28:29]
-; GFX9-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[30:31]
-; GFX9-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[34:35]
-; GFX9-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[34:35]
-; GFX9-NEXT: v_readlane_b32 s35, v34, 3
-; GFX9-NEXT: v_readlane_b32 s34, v34, 2
-; GFX9-NEXT: v_readlane_b32 s31, v34, 1
-; GFX9-NEXT: v_readlane_b32 s30, v34, 0
-; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX9-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_v16f64:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_accvgpr_write_b32 a1, v40 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a2, v41 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a3, v42 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a4, v43 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a5, v44 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a6, v45 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a7, v46 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a8, v47 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a9, v56 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a10, v57 ; Reload Reuse
-; GFX940-NEXT: scratch_load_dword v37, off, s32 offset:16
-; GFX940-NEXT: scratch_load_dword v36, off, s32 offset:12
-; GFX940-NEXT: scratch_load_dword v39, off, s32 offset:24
-; GFX940-NEXT: scratch_load_dword v38, off, s32 offset:20
-; GFX940-NEXT: scratch_load_dword v49, off, s32 offset:32
-; GFX940-NEXT: scratch_load_dword v48, off, s32 offset:28
-; GFX940-NEXT: scratch_load_dword v57, off, s32 offset:8
-; GFX940-NEXT: scratch_load_dword v56, off, s32 offset:4
-; GFX940-NEXT: scratch_load_dword v47, off, s32 offset:40
-; GFX940-NEXT: scratch_load_dword v46, off, s32 offset:36
-; GFX940-NEXT: scratch_load_dword v45, off, s32 offset:48
-; GFX940-NEXT: scratch_load_dword v44, off, s32 offset:44
-; GFX940-NEXT: scratch_load_dword v43, off, s32 offset:56
-; GFX940-NEXT: scratch_load_dword v42, off, s32 offset:52
-; GFX940-NEXT: scratch_load_dword v41, off, s32 offset:64
-; GFX940-NEXT: scratch_load_dword v40, off, s32 offset:60
-; GFX940-NEXT: scratch_load_dword v55, off, s32 offset:72
-; GFX940-NEXT: scratch_load_dword v54, off, s32 offset:68
-; GFX940-NEXT: scratch_load_dword v53, off, s32 offset:80
-; GFX940-NEXT: scratch_load_dword v52, off, s32 offset:76
-; GFX940-NEXT: scratch_load_dword v51, off, s32 offset:88
-; GFX940-NEXT: scratch_load_dword v50, off, s32 offset:84
-; GFX940-NEXT: scratch_load_dword v35, off, s32 offset:96
-; GFX940-NEXT: scratch_load_dword v34, off, s32 offset:92
-; GFX940-NEXT: scratch_load_dword v31, off, s32
-; GFX940-NEXT: scratch_load_dword v33, off, s32 offset:104
-; GFX940-NEXT: scratch_load_dword v32, off, s32 offset:100
-; GFX940-NEXT: v_accvgpr_write_b32 a11, v58 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a12, v59 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a13, v60 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a14, v61 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a15, v62 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a16, v63 ; Reload Reuse
-; GFX940-NEXT: s_waitcnt vmcnt(25)
-; GFX940-NEXT: v_min_f64 v[58:59], v[2:3], v[36:37]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[36:37]
-; GFX940-NEXT: scratch_load_dword v37, off, s32 offset:112
-; GFX940-NEXT: scratch_load_dword v36, off, s32 offset:108
-; GFX940-NEXT: s_waitcnt vmcnt(25)
-; GFX940-NEXT: v_min_f64 v[60:61], v[4:5], v[38:39]
-; GFX940-NEXT: v_cmp_u_f64_e64 s[0:1], v[4:5], v[38:39]
-; GFX940-NEXT: scratch_load_dword v39, off, s32 offset:120
-; GFX940-NEXT: scratch_load_dword v38, off, s32 offset:116
-; GFX940-NEXT: s_waitcnt vmcnt(25)
-; GFX940-NEXT: v_min_f64 v[62:63], v[6:7], v[48:49]
-; GFX940-NEXT: v_cmp_u_f64_e64 s[2:3], v[6:7], v[48:49]
-; GFX940-NEXT: scratch_load_dword v49, off, s32 offset:128
-; GFX940-NEXT: scratch_load_dword v48, off, s32 offset:124
-; GFX940-NEXT: s_waitcnt vmcnt(25)
-; GFX940-NEXT: v_min_f64 v[2:3], v[0:1], v[56:57]
-; GFX940-NEXT: v_cmp_u_f64_e64 s[4:5], v[0:1], v[56:57]
-; GFX940-NEXT: v_mov_b32_e32 v0, 0x7ff80000
-; GFX940-NEXT: s_waitcnt vmcnt(23)
-; GFX940-NEXT: v_min_f64 v[56:57], v[8:9], v[46:47]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v2, 0, s[4:5]
-; GFX940-NEXT: v_accvgpr_write_b32 a0, v1
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v3, v0, s[4:5]
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v58, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v59, v0, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[8:9], v[46:47]
-; GFX940-NEXT: s_waitcnt vmcnt(21)
-; GFX940-NEXT: v_min_f64 v[46:47], v[10:11], v[44:45]
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v60, 0, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v8, v56, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v9, v57, v0, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[10:11], v[44:45]
-; GFX940-NEXT: s_waitcnt vmcnt(19)
-; GFX940-NEXT: v_min_f64 v[44:45], v[12:13], v[42:43]
-; GFX940-NEXT: v_cndmask_b32_e64 v5, v61, v0, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v10, v46, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v11, v47, v0, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[12:13], v[42:43]
-; GFX940-NEXT: s_waitcnt vmcnt(17)
-; GFX940-NEXT: v_min_f64 v[42:43], v[14:15], v[40:41]
-; GFX940-NEXT: v_cndmask_b32_e64 v6, v62, 0, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v12, v44, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v13, v45, v0, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[40:41]
-; GFX940-NEXT: s_waitcnt vmcnt(15)
-; GFX940-NEXT: v_min_f64 v[40:41], v[16:17], v[54:55]
-; GFX940-NEXT: v_cndmask_b32_e64 v7, v63, v0, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v14, v42, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v15, v43, v0, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[16:17], v[54:55]
-; GFX940-NEXT: s_waitcnt vmcnt(13)
-; GFX940-NEXT: v_min_f64 v[54:55], v[18:19], v[52:53]
-; GFX940-NEXT: v_accvgpr_read_b32 v63, a16 ; Reload Reuse
-; GFX940-NEXT: v_cndmask_b32_e64 v16, v40, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v17, v41, v0, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[18:19], v[52:53]
-; GFX940-NEXT: s_waitcnt vmcnt(11)
-; GFX940-NEXT: v_min_f64 v[52:53], v[20:21], v[50:51]
-; GFX940-NEXT: v_accvgpr_read_b32 v62, a15 ; Reload Reuse
-; GFX940-NEXT: v_cndmask_b32_e64 v18, v54, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v19, v55, v0, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[20:21], v[50:51]
-; GFX940-NEXT: s_waitcnt vmcnt(9)
-; GFX940-NEXT: v_min_f64 v[50:51], v[22:23], v[34:35]
-; GFX940-NEXT: v_accvgpr_read_b32 v61, a14 ; Reload Reuse
-; GFX940-NEXT: v_cndmask_b32_e64 v20, v52, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v21, v53, v0, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[22:23], v[34:35]
-; GFX940-NEXT: s_waitcnt vmcnt(6)
-; GFX940-NEXT: v_min_f64 v[34:35], v[24:25], v[32:33]
-; GFX940-NEXT: v_accvgpr_read_b32 v60, a13 ; Reload Reuse
-; GFX940-NEXT: v_cndmask_b32_e64 v22, v50, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v23, v51, v0, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[24:25], v[32:33]
-; GFX940-NEXT: v_accvgpr_read_b32 v59, a12 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v58, a11 ; Reload Reuse
-; GFX940-NEXT: v_cndmask_b32_e64 v24, v34, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v25, v35, v0, vcc
-; GFX940-NEXT: v_accvgpr_read_b32 v57, a10 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v56, a9 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v47, a8 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v46, a7 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v45, a6 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v44, a5 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v43, a4 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v42, a3 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v41, a2 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v40, a1 ; Reload Reuse
-; GFX940-NEXT: s_waitcnt vmcnt(4)
-; GFX940-NEXT: v_min_f64 v[32:33], v[26:27], v[36:37]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[26:27], v[36:37]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v26, v32, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v27, v33, v0, vcc
-; GFX940-NEXT: s_waitcnt vmcnt(2)
-; GFX940-NEXT: v_min_f64 v[32:33], v[28:29], v[38:39]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[28:29], v[38:39]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v28, v32, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v29, v33, v0, vcc
-; GFX940-NEXT: s_waitcnt vmcnt(0)
-; GFX940-NEXT: v_min_f64 v[32:33], v[30:31], v[48:49]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[30:31], v[48:49]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v30, v32, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v31, v33, v0, vcc
-; GFX940-NEXT: v_accvgpr_read_b32 v0, a0
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v16f64:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX900-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
+; GFX900-NEXT: s_mov_b64 exec, s[4:5]
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4
+; GFX900-NEXT: v_writelane_b32 v34, s30, 0
+; GFX900-NEXT: v_writelane_b32 v34, s31, 1
+; GFX900-NEXT: v_writelane_b32 v34, s34, 2
+; GFX900-NEXT: v_writelane_b32 v34, s35, 3
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[31:32]
+; GFX900-NEXT: v_min_f64 v[0:1], v[0:1], v[31:32]
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:16
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:12
+; GFX900-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[31:32]
+; GFX900-NEXT: v_min_f64 v[2:3], v[2:3], v[31:32]
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:24
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:20
+; GFX900-NEXT: v_cndmask_b32_e64 v2, v2, 0, s[4:5]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[31:32]
+; GFX900-NEXT: v_min_f64 v[4:5], v[4:5], v[31:32]
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:32
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28
+; GFX900-NEXT: v_cndmask_b32_e64 v4, v4, 0, s[6:7]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[31:32]
+; GFX900-NEXT: v_min_f64 v[6:7], v[6:7], v[31:32]
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:36
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:40
+; GFX900-NEXT: v_cndmask_b32_e64 v6, v6, 0, s[8:9]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[31:32]
+; GFX900-NEXT: v_min_f64 v[8:9], v[8:9], v[31:32]
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:48
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:44
+; GFX900-NEXT: v_cndmask_b32_e64 v8, v8, 0, s[10:11]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e64 s[12:13], v[10:11], v[31:32]
+; GFX900-NEXT: v_min_f64 v[10:11], v[10:11], v[31:32]
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:56
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:52
+; GFX900-NEXT: v_cndmask_b32_e64 v10, v10, 0, s[12:13]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e64 s[14:15], v[12:13], v[31:32]
+; GFX900-NEXT: v_min_f64 v[12:13], v[12:13], v[31:32]
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:64
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:60
+; GFX900-NEXT: v_cndmask_b32_e64 v12, v12, 0, s[14:15]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e64 s[16:17], v[14:15], v[31:32]
+; GFX900-NEXT: v_min_f64 v[14:15], v[14:15], v[31:32]
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:68
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:72
+; GFX900-NEXT: v_cndmask_b32_e64 v14, v14, 0, s[16:17]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e64 s[18:19], v[16:17], v[31:32]
+; GFX900-NEXT: v_min_f64 v[16:17], v[16:17], v[31:32]
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:80
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:76
+; GFX900-NEXT: v_cndmask_b32_e64 v16, v16, 0, s[18:19]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e64 s[20:21], v[18:19], v[31:32]
+; GFX900-NEXT: v_min_f64 v[18:19], v[18:19], v[31:32]
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:88
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:84
+; GFX900-NEXT: v_cndmask_b32_e64 v18, v18, 0, s[20:21]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e64 s[22:23], v[20:21], v[31:32]
+; GFX900-NEXT: v_min_f64 v[20:21], v[20:21], v[31:32]
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:96
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:92
+; GFX900-NEXT: v_cndmask_b32_e64 v20, v20, 0, s[22:23]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e64 s[24:25], v[22:23], v[31:32]
+; GFX900-NEXT: v_min_f64 v[22:23], v[22:23], v[31:32]
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:100
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:104
+; GFX900-NEXT: v_cndmask_b32_e64 v22, v22, 0, s[24:25]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e64 s[26:27], v[24:25], v[31:32]
+; GFX900-NEXT: v_min_f64 v[24:25], v[24:25], v[31:32]
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:112
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:108
+; GFX900-NEXT: v_cndmask_b32_e64 v24, v24, 0, s[26:27]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e64 s[28:29], v[26:27], v[31:32]
+; GFX900-NEXT: v_min_f64 v[26:27], v[26:27], v[31:32]
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:120
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116
+; GFX900-NEXT: v_cndmask_b32_e64 v26, v26, 0, s[28:29]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e64 s[30:31], v[28:29], v[31:32]
+; GFX900-NEXT: v_min_f64 v[28:29], v[28:29], v[31:32]
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32
+; GFX900-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:128
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:124
+; GFX900-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[30:31]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e64 s[34:35], v[30:31], v[32:33]
+; GFX900-NEXT: v_min_f64 v[30:31], v[30:31], v[32:33]
+; GFX900-NEXT: v_mov_b32_e32 v32, 0x7ff80000
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v32, vcc
+; GFX900-NEXT: v_cndmask_b32_e64 v3, v3, v32, s[4:5]
+; GFX900-NEXT: v_cndmask_b32_e64 v5, v5, v32, s[6:7]
+; GFX900-NEXT: v_cndmask_b32_e64 v7, v7, v32, s[8:9]
+; GFX900-NEXT: v_cndmask_b32_e64 v9, v9, v32, s[10:11]
+; GFX900-NEXT: v_cndmask_b32_e64 v11, v11, v32, s[12:13]
+; GFX900-NEXT: v_cndmask_b32_e64 v13, v13, v32, s[14:15]
+; GFX900-NEXT: v_cndmask_b32_e64 v15, v15, v32, s[16:17]
+; GFX900-NEXT: v_cndmask_b32_e64 v17, v17, v32, s[18:19]
+; GFX900-NEXT: v_cndmask_b32_e64 v19, v19, v32, s[20:21]
+; GFX900-NEXT: v_cndmask_b32_e64 v21, v21, v32, s[22:23]
+; GFX900-NEXT: v_cndmask_b32_e64 v23, v23, v32, s[24:25]
+; GFX900-NEXT: v_cndmask_b32_e64 v25, v25, v32, s[26:27]
+; GFX900-NEXT: v_cndmask_b32_e64 v27, v27, v32, s[28:29]
+; GFX900-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[30:31]
+; GFX900-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[34:35]
+; GFX900-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[34:35]
+; GFX900-NEXT: v_readlane_b32 s35, v34, 3
+; GFX900-NEXT: v_readlane_b32 s34, v34, 2
+; GFX900-NEXT: v_readlane_b32 s31, v34, 1
+; GFX900-NEXT: v_readlane_b32 s30, v34, 0
+; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX900-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload
+; GFX900-NEXT: s_mov_b64 exec, s[4:5]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v16f64:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_accvgpr_write_b32 a1, v40 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_write_b32 a2, v41 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_write_b32 a3, v42 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_write_b32 a4, v43 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_write_b32 a5, v44 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_write_b32 a6, v45 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_write_b32 a7, v46 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_write_b32 a8, v47 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_write_b32 a9, v56 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_write_b32 a10, v57 ; Reload Reuse
+; GFX950-NEXT: scratch_load_dword v37, off, s32 offset:16
+; GFX950-NEXT: scratch_load_dword v36, off, s32 offset:12
+; GFX950-NEXT: scratch_load_dword v39, off, s32 offset:24
+; GFX950-NEXT: scratch_load_dword v38, off, s32 offset:20
+; GFX950-NEXT: scratch_load_dword v49, off, s32 offset:32
+; GFX950-NEXT: scratch_load_dword v48, off, s32 offset:28
+; GFX950-NEXT: scratch_load_dword v57, off, s32 offset:8
+; GFX950-NEXT: scratch_load_dword v56, off, s32 offset:4
+; GFX950-NEXT: scratch_load_dword v47, off, s32 offset:40
+; GFX950-NEXT: scratch_load_dword v46, off, s32 offset:36
+; GFX950-NEXT: scratch_load_dword v45, off, s32 offset:48
+; GFX950-NEXT: scratch_load_dword v44, off, s32 offset:44
+; GFX950-NEXT: scratch_load_dword v43, off, s32 offset:56
+; GFX950-NEXT: scratch_load_dword v42, off, s32 offset:52
+; GFX950-NEXT: scratch_load_dword v41, off, s32 offset:64
+; GFX950-NEXT: scratch_load_dword v40, off, s32 offset:60
+; GFX950-NEXT: scratch_load_dword v55, off, s32 offset:72
+; GFX950-NEXT: scratch_load_dword v54, off, s32 offset:68
+; GFX950-NEXT: scratch_load_dword v53, off, s32 offset:80
+; GFX950-NEXT: scratch_load_dword v52, off, s32 offset:76
+; GFX950-NEXT: scratch_load_dword v51, off, s32 offset:88
+; GFX950-NEXT: scratch_load_dword v50, off, s32 offset:84
+; GFX950-NEXT: scratch_load_dword v35, off, s32 offset:96
+; GFX950-NEXT: scratch_load_dword v34, off, s32 offset:92
+; GFX950-NEXT: scratch_load_dword v31, off, s32
+; GFX950-NEXT: scratch_load_dword v33, off, s32 offset:104
+; GFX950-NEXT: scratch_load_dword v32, off, s32 offset:100
+; GFX950-NEXT: v_accvgpr_write_b32 a11, v58 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_write_b32 a12, v59 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_write_b32 a13, v60 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_write_b32 a14, v61 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_write_b32 a15, v62 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_write_b32 a16, v63 ; Reload Reuse
+; GFX950-NEXT: s_waitcnt vmcnt(25)
+; GFX950-NEXT: v_min_f64 v[58:59], v[2:3], v[36:37]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[36:37]
+; GFX950-NEXT: scratch_load_dword v37, off, s32 offset:112
+; GFX950-NEXT: scratch_load_dword v36, off, s32 offset:108
+; GFX950-NEXT: s_waitcnt vmcnt(25)
+; GFX950-NEXT: v_min_f64 v[60:61], v[4:5], v[38:39]
+; GFX950-NEXT: v_cmp_u_f64_e64 s[0:1], v[4:5], v[38:39]
+; GFX950-NEXT: scratch_load_dword v39, off, s32 offset:120
+; GFX950-NEXT: scratch_load_dword v38, off, s32 offset:116
+; GFX950-NEXT: s_waitcnt vmcnt(25)
+; GFX950-NEXT: v_min_f64 v[62:63], v[6:7], v[48:49]
+; GFX950-NEXT: v_cmp_u_f64_e64 s[2:3], v[6:7], v[48:49]
+; GFX950-NEXT: scratch_load_dword v49, off, s32 offset:128
+; GFX950-NEXT: scratch_load_dword v48, off, s32 offset:124
+; GFX950-NEXT: s_waitcnt vmcnt(25)
+; GFX950-NEXT: v_min_f64 v[2:3], v[0:1], v[56:57]
+; GFX950-NEXT: v_cmp_u_f64_e64 s[4:5], v[0:1], v[56:57]
+; GFX950-NEXT: v_mov_b32_e32 v0, 0x7ff80000
+; GFX950-NEXT: s_waitcnt vmcnt(23)
+; GFX950-NEXT: v_min_f64 v[56:57], v[8:9], v[46:47]
+; GFX950-NEXT: v_cndmask_b32_e64 v1, v2, 0, s[4:5]
+; GFX950-NEXT: v_accvgpr_write_b32 a0, v1
+; GFX950-NEXT: v_cndmask_b32_e64 v1, v3, v0, s[4:5]
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v58, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v59, v0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[8:9], v[46:47]
+; GFX950-NEXT: s_waitcnt vmcnt(21)
+; GFX950-NEXT: v_min_f64 v[46:47], v[10:11], v[44:45]
+; GFX950-NEXT: v_cndmask_b32_e64 v4, v60, 0, s[0:1]
+; GFX950-NEXT: v_cndmask_b32_e64 v8, v56, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v9, v57, v0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[10:11], v[44:45]
+; GFX950-NEXT: s_waitcnt vmcnt(19)
+; GFX950-NEXT: v_min_f64 v[44:45], v[12:13], v[42:43]
+; GFX950-NEXT: v_cndmask_b32_e64 v5, v61, v0, s[0:1]
+; GFX950-NEXT: v_cndmask_b32_e64 v10, v46, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v11, v47, v0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[12:13], v[42:43]
+; GFX950-NEXT: s_waitcnt vmcnt(17)
+; GFX950-NEXT: v_min_f64 v[42:43], v[14:15], v[40:41]
+; GFX950-NEXT: v_cndmask_b32_e64 v6, v62, 0, s[2:3]
+; GFX950-NEXT: v_cndmask_b32_e64 v12, v44, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v13, v45, v0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[40:41]
+; GFX950-NEXT: s_waitcnt vmcnt(15)
+; GFX950-NEXT: v_min_f64 v[40:41], v[16:17], v[54:55]
+; GFX950-NEXT: v_cndmask_b32_e64 v7, v63, v0, s[2:3]
+; GFX950-NEXT: v_cndmask_b32_e64 v14, v42, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v15, v43, v0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[16:17], v[54:55]
+; GFX950-NEXT: s_waitcnt vmcnt(13)
+; GFX950-NEXT: v_min_f64 v[54:55], v[18:19], v[52:53]
+; GFX950-NEXT: v_accvgpr_read_b32 v63, a16 ; Reload Reuse
+; GFX950-NEXT: v_cndmask_b32_e64 v16, v40, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v17, v41, v0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[18:19], v[52:53]
+; GFX950-NEXT: s_waitcnt vmcnt(11)
+; GFX950-NEXT: v_min_f64 v[52:53], v[20:21], v[50:51]
+; GFX950-NEXT: v_accvgpr_read_b32 v62, a15 ; Reload Reuse
+; GFX950-NEXT: v_cndmask_b32_e64 v18, v54, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v19, v55, v0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[20:21], v[50:51]
+; GFX950-NEXT: s_waitcnt vmcnt(9)
+; GFX950-NEXT: v_min_f64 v[50:51], v[22:23], v[34:35]
+; GFX950-NEXT: v_accvgpr_read_b32 v61, a14 ; Reload Reuse
+; GFX950-NEXT: v_cndmask_b32_e64 v20, v52, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v21, v53, v0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[22:23], v[34:35]
+; GFX950-NEXT: s_waitcnt vmcnt(6)
+; GFX950-NEXT: v_min_f64 v[34:35], v[24:25], v[32:33]
+; GFX950-NEXT: v_accvgpr_read_b32 v60, a13 ; Reload Reuse
+; GFX950-NEXT: v_cndmask_b32_e64 v22, v50, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v23, v51, v0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[24:25], v[32:33]
+; GFX950-NEXT: v_accvgpr_read_b32 v59, a12 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v58, a11 ; Reload Reuse
+; GFX950-NEXT: v_cndmask_b32_e64 v24, v34, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v25, v35, v0, vcc
+; GFX950-NEXT: v_accvgpr_read_b32 v57, a10 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v56, a9 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v47, a8 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v46, a7 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v45, a6 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v44, a5 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v43, a4 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v42, a3 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v41, a2 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v40, a1 ; Reload Reuse
+; GFX950-NEXT: s_waitcnt vmcnt(4)
+; GFX950-NEXT: v_min_f64 v[32:33], v[26:27], v[36:37]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[26:27], v[36:37]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v26, v32, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v27, v33, v0, vcc
+; GFX950-NEXT: s_waitcnt vmcnt(2)
+; GFX950-NEXT: v_min_f64 v[32:33], v[28:29], v[38:39]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[28:29], v[38:39]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v28, v32, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v29, v33, v0, vcc
+; GFX950-NEXT: s_waitcnt vmcnt(0)
+; GFX950-NEXT: v_min_f64 v[32:33], v[30:31], v[48:49]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[30:31], v[48:49]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v30, v32, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v31, v33, v0, vcc
+; GFX950-NEXT: v_accvgpr_read_b32 v0, a0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v16f64:
; GFX10: ; %bb.0:
diff --git a/llvm/test/MC/AMDGPU/flat-scratch-gfx940.s b/llvm/test/MC/AMDGPU/flat-scratch-gfx940.s
index fde3d2057b2ad1..d3ca4281dca414 100644
--- a/llvm/test/MC/AMDGPU/flat-scratch-gfx940.s
+++ b/llvm/test/MC/AMDGPU/flat-scratch-gfx940.s
@@ -1,4 +1,5 @@
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx940 -show-encoding %s | FileCheck -check-prefix=GFX940 %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx950 -show-encoding %s | FileCheck -check-prefix=GFX940 %s
scratch_load_dword a2, v4, s6
// GFX940: scratch_load_dword a2, v4, s6 ; encoding: [0x00,0x60,0x50,0xdc,0x04,0x00,0x86,0x02]
diff --git a/llvm/test/MC/AMDGPU/gfx940_asm_features.s b/llvm/test/MC/AMDGPU/gfx940_asm_features.s
index e208b6cf903d38..e2e84f27b828a4 100644
--- a/llvm/test/MC/AMDGPU/gfx940_asm_features.s
+++ b/llvm/test/MC/AMDGPU/gfx940_asm_features.s
@@ -1,4 +1,5 @@
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx940 -show-encoding %s | FileCheck --check-prefix=GFX940 --strict-whitespace %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx950 -show-encoding %s | FileCheck --check-prefix=GFX940 --strict-whitespace %s
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx90a %s 2>&1 | FileCheck --check-prefixes=NOT-GFX940,GFX90A --implicit-check-not=error: %s
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefixes=NOT-GFX940,GFX10 --implicit-check-not=error: %s
diff --git a/llvm/test/MC/AMDGPU/gfx950-unsupported.s b/llvm/test/MC/AMDGPU/gfx950-unsupported.s
new file mode 100644
index 00000000000000..f8bbd40b700fd8
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx950-unsupported.s
@@ -0,0 +1,179 @@
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx950 %s 2>&1 | FileCheck -check-prefix=ERR %s
+
+//===----------------------------------------------------------------------===//
+// v_mfma_f32_32x32x4_xf32
+//===----------------------------------------------------------------------===//
+
+v_mfma_f32_32x32x4_xf32 a[0:3], v[2:3], v[4:5], a[2:5]
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_32x32x4_xf32 v[0:3], v[0:3], v[0:3], v[0:3]
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_32x32x4_xf32 v[0:3], v[0:3], v[0:3], v[0:3]
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_32x32x4_xf32 a[0:3], a[0:3], a[0:3], a[0:3]
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_32x32x4_xf32 a[0:3], a[0:3], a[0:3], a[0:3]
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_32x32x4_xf32 v[0:3], a[0:3], v[0:3], 1.0
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_32x32x4_xf32 a[0:3], v[0:3], a[0:3], 1.0
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_32x32x4_xf32 v[0:3], v[0:3], v[0:3], v[0:3] blgp:5
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_32x32x4_xf32 a[0:3], a[0:3], a[0:3], a[0:3] blgp:1
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_32x32x4_xf32 a[0:3], a[0:3], a[0:3], a[0:3] cbsz:3
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_32x32x4_xf32 a[0:3], a[0:3], a[0:3], a[0:3] abid:1
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_32x32x4_xf32 a[0:3], a[0:3], a[0:3], a[0:3] cbsz:3 abid:1
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_32x32x4_xf32 a[0:3], v[0:3], v[0:3], a[4:7]
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_32x32x4_xf32 v[0:3], a[0:3], a[0:3], v[4:7]
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_32x32x4_xf32 a[0:3], v[2:3], v[4:5], a[2:5]
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_32x32x4_xf32 v[0:3], v[0:3], v[0:3], v[0:3]
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_32x32x4_xf32 v[0:3], v[0:3], v[0:3], v[0:3]
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_32x32x4_xf32 a[0:3], a[0:3], a[0:3], a[0:3]
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_32x32x4_xf32 a[0:3], a[0:3], a[0:3], a[0:3]
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_32x32x4_xf32 v[0:3], a[0:3], v[0:3], 1.0
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_32x32x4_xf32 a[0:3], v[0:3], a[0:3], 1.0
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_32x32x4_xf32 v[0:3], v[0:3], v[0:3], v[0:3] blgp:5
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_32x32x4_xf32 a[0:3], a[0:3], a[0:3], a[0:3] blgp:1
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_32x32x4_xf32 a[0:3], a[0:3], a[0:3], a[0:3] cbsz:3
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_32x32x4_xf32 a[0:3], a[0:3], a[0:3], a[0:3] abid:1
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_32x32x4_xf32 a[0:3], a[0:3], a[0:3], a[0:3] cbsz:3 abid:1
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_32x32x4_xf32 a[0:3], v[0:3], v[0:3], a[4:7]
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_32x32x4_xf32 v[0:3], a[0:3], a[0:3], v[4:7]
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+
+//===----------------------------------------------------------------------===//
+// v_mfma_f32_16x16x8_xf32
+//===----------------------------------------------------------------------===//
+
+v_mfma_f32_16x16x8_xf32 a[0:3], v[2:3], v[4:5], a[2:5]
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_16x16x8_xf32 v[0:3], v[0:3], v[0:3], v[0:3]
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_16x16x8_xf32 v[0:3], v[0:3], v[0:3], v[0:3]
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_16x16x8_xf32 a[0:3], a[0:3], a[0:3], a[0:3]
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_16x16x8_xf32 a[0:3], a[0:3], a[0:3], a[0:3]
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_16x16x8_xf32 v[0:3], a[0:3], v[0:3], 1.0
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_16x16x8_xf32 a[0:3], v[0:3], a[0:3], 1.0
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_16x16x8_xf32 v[0:3], v[0:3], v[0:3], v[0:3] blgp:5
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_16x16x8_xf32 a[0:3], a[0:3], a[0:3], a[0:3] blgp:1
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_16x16x8_xf32 a[0:3], a[0:3], a[0:3], a[0:3] cbsz:3
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_16x16x8_xf32 a[0:3], a[0:3], a[0:3], a[0:3] abid:1
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_16x16x8_xf32 a[0:3], a[0:3], a[0:3], a[0:3] cbsz:3 abid:1
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_16x16x8_xf32 a[0:3], v[0:3], v[0:3], a[4:7]
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_16x16x8_xf32 v[0:3], a[0:3], a[0:3], v[4:7]
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+
+v_mfma_f32_16x16x8_xf32 a[0:3], v[2:3], v[4:5], a[2:5]
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_16x16x8_xf32 v[0:3], v[0:3], v[0:3], v[0:3]
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_16x16x8_xf32 v[0:3], v[0:3], v[0:3], v[0:3]
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_16x16x8_xf32 a[0:3], a[0:3], a[0:3], a[0:3]
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_16x16x8_xf32 a[0:3], a[0:3], a[0:3], a[0:3]
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_16x16x8_xf32 v[0:3], a[0:3], v[0:3], 1.0
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_16x16x8_xf32 a[0:3], v[0:3], a[0:3], 1.0
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_16x16x8_xf32 v[0:3], v[0:3], v[0:3], v[0:3] blgp:5
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_16x16x8_xf32 a[0:3], a[0:3], a[0:3], a[0:3] blgp:1
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_16x16x8_xf32 a[0:3], a[0:3], a[0:3], a[0:3] cbsz:3
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_16x16x8_xf32 a[0:3], a[0:3], a[0:3], a[0:3] abid:1
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_16x16x8_xf32 a[0:3], a[0:3], a[0:3], a[0:3] cbsz:3 abid:1
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_16x16x8_xf32 a[0:3], v[0:3], v[0:3], a[4:7]
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_16x16x8_xf32 v[0:3], a[0:3], a[0:3], v[4:7]
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx950_invalid_encoding.txt b/llvm/test/MC/AMDGPU/gfx950_invalid_encoding.txt
new file mode 100644
index 00000000000000..0697ee8661e76d
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx950_invalid_encoding.txt
@@ -0,0 +1,13 @@
+# RUN: llvm-mc -disassemble -arch=amdgcn -mcpu=gfx950 -show-encoding %s 2>&1 | FileCheck --implicit-check-not=warning: --check-prefix=GFX950 %s
+
+# GFX950: warning: invalid instruction encoding
+0x00,0x80,0xbe,0xd3,0x02,0x09,0x0a,0x04
+
+# GFX950: warning: invalid instruction encoding
+0x00,0x00,0xbe,0xd3,0x02,0x09,0x0a,0x04
+
+# GFX950: warning: invalid instruction encoding
+0x00,0x00,0xbf,0xd3,0x02,0x09,0x0a,0x04
+
+# GFX950: warning: invalid instruction encoding
+0x00,0x80,0xbf,0xd3,0x02,0x09,0x0a,0x04
\ No newline at end of file
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx940_features.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx940_features.txt
index 9575e50f16312f..63e425fdb4ec96 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx940_features.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx940_features.txt
@@ -1,4 +1,5 @@
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx940 -disassemble -show-encoding %s | FileCheck -strict-whitespace --check-prefix=GFX940 %s
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx950 -disassemble -show-encoding %s | FileCheck -strict-whitespace --check-prefix=GFX940 %s
# GFX940: global_load_dword v2, v[2:3], off sc0 ; encoding: [0x00,0x80,0x51,0xdc,0x02,0x00,0x7f,0x02]
0x00,0x80,0x51,0xdc,0x02,0x00,0x7f,0x02
diff --git a/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml b/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml
index 9c79ea588f6247..416419b3a333f8 100644
--- a/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml
+++ b/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml
@@ -162,6 +162,10 @@
# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX942 | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX942 %s
# RUN: obj2yaml %t.o.AMDGCN_GFX942 | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX942 %s
+# RUN: sed -e 's/<BITS>/64/' -e 's/<MACH>/AMDGCN_GFX950/' %s | yaml2obj -o %t.o.AMDGCN_GFX950
+# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX950 | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX950 %s
+# RUN: obj2yaml %t.o.AMDGCN_GFX950 | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX950 %s
+
# RUN: sed -e 's/<BITS>/64/' -e 's/<MACH>/AMDGCN_GFX1010/' %s | yaml2obj -o %t.o.AMDGCN_GFX1010
# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX1010 | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX1010 %s
# RUN: obj2yaml %t.o.AMDGCN_GFX1010 | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX1010 %s
@@ -411,6 +415,9 @@
# ELF-AMDGCN-GFX942: EF_AMDGPU_MACH_AMDGCN_GFX942 (0x4C)
# YAML-AMDGCN-GFX942: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX942 ]
+# ELF-AMDGCN-GFX950: EF_AMDGPU_MACH_AMDGCN_GFX950 (0x4F)
+# YAML-AMDGCN-GFX950: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX950 ]
+
# ELF-AMDGCN-GFX1010: EF_AMDGPU_MACH_AMDGCN_GFX1010 (0x33)
# YAML-AMDGCN-GFX1010: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX1010 ]
diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll
index 45071ecb751321..475f6f6d8322c7 100644
--- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll
+++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll
@@ -146,6 +146,11 @@ define amdgpu_kernel void @test_kernel() {
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=6 -mcpu=gfx9-generic -filetype=obj -O0 -o %t.o %s
; RUN: llvm-objdump -D --arch-name=amdgcn -mllvm --amdhsa-code-object-version=6 --mcpu=gfx9-generic %t.o > %t-specify.txt
; RUN: llvm-objdump -D -mllvm --amdhsa-code-object-version=6 %t.o > %t-detect.txt
+;
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -filetype=obj -O0 -o %t.o %s
+; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx950 %t.o > %t-specify.txt
+; RUN: llvm-objdump -D %t.o > %t-detect.txt
+;
; RUN: diff %t-specify.txt %t-detect.txt
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -filetype=obj -O0 -o %t.o %s
diff --git a/llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test b/llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test
index 34c22dca3aa183..7de64a6edfe2e6 100644
--- a/llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test
+++ b/llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test
@@ -223,6 +223,15 @@
# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX942
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX942 -DFLAG_VALUE=0x4C
+# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX950
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX950 -DFLAG_VALUE=0x4F
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX950
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX950 -DFLAG_VALUE=0x4F
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX950
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX950 -DFLAG_VALUE=0x4F
+
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1010
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1010 -DFLAG_VALUE=0x33
diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp
index 1012cd020d525e..6360a169cbeda9 100644
--- a/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -1617,6 +1617,7 @@ const EnumEntry<unsigned> ElfHeaderMipsFlags[] = {
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90A, "gfx90a"), \
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90C, "gfx90c"), \
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX940, "gfx940"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX950, "gfx950"), \
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX941, "gfx941"), \
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX942, "gfx942"), \
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1010, "gfx1010"), \
diff --git a/offload/DeviceRTL/CMakeLists.txt b/offload/DeviceRTL/CMakeLists.txt
index 96cb79b7d071c5..c76ad018ab4fe7 100644
--- a/offload/DeviceRTL/CMakeLists.txt
+++ b/offload/DeviceRTL/CMakeLists.txt
@@ -43,7 +43,7 @@ set(include_directory ${devicertl_base_directory}/include)
set(source_directory ${devicertl_base_directory}/src)
set(all_amdgpu_architectures "gfx700;gfx701;gfx801;gfx803;gfx900;gfx902;gfx906"
- "gfx908;gfx90a;gfx90c;gfx940;gfx941;gfx942;gfx1010"
+ "gfx908;gfx90a;gfx90c;gfx940;gfx941;gfx942;gfx950;gfx1010"
"gfx1030;gfx1031;gfx1032;gfx1033;gfx1034;gfx1035"
"gfx1036;gfx1100;gfx1101;gfx1102;gfx1103;gfx1150"
"gfx1151;gfx1152;gfx1153")
More information about the cfe-commits
mailing list