[clang] [llvm] AMDGPU: Add gfx950 subtarget definitions (PR #116307)
Matt Arsenault via cfe-commits
cfe-commits at lists.llvm.org
Mon Nov 18 10:39:12 PST 2024
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/116307
>From c5735e124e3c579127773e6106ddf361e8f3d14b Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Fri, 17 Nov 2023 17:49:52 +0900
Subject: [PATCH 1/3] AMDGPU: Add gfx950 subtarget definitions
Mostly a stub, but adds some baseline tests and
tests for removed instructions.
---
clang/docs/ReleaseNotes.rst | 2 +
clang/include/clang/Basic/Cuda.h | 1 +
clang/lib/Basic/Cuda.cpp | 1 +
clang/lib/Basic/Targets/NVPTX.cpp | 1 +
clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp | 1 +
clang/test/CodeGenOpenCL/amdgpu-features.cl | 2 +
clang/test/Driver/amdgpu-macros.cl | 1 +
clang/test/Driver/amdgpu-mcpu.cl | 2 +
.../Misc/target-invalid-cpu-note/amdgcn.c | 1 +
.../test/Misc/target-invalid-cpu-note/nvptx.c | 1 +
llvm/docs/AMDGPUUsage.rst | 9 +-
llvm/include/llvm/BinaryFormat/ELF.h | 2 +-
llvm/include/llvm/TargetParser/TargetParser.h | 25 +-
llvm/lib/Object/ELFObjectFile.cpp | 2 +
llvm/lib/ObjectYAML/ELFYAML.cpp | 1 +
llvm/lib/Target/AMDGPU/AMDGPU.td | 16 +
llvm/lib/Target/AMDGPU/GCNProcessors.td | 4 +
llvm/lib/Target/AMDGPU/GCNSubtarget.h | 1 +
.../MCTargetDesc/AMDGPUTargetStreamer.cpp | 2 +
llvm/lib/TargetParser/TargetParser.cpp | 11 +-
llvm/test/CodeGen/AMDGPU/bf16-conversions.ll | 345 ++--
.../CodeGen/AMDGPU/directive-amdgcn-target.ll | 6 +
.../CodeGen/AMDGPU/elf-header-flags-mach.ll | 2 +
.../AMDGPU/elf-header-flags-sramecc.ll | 8 +
llvm/test/CodeGen/AMDGPU/fmaximum3.ll | 594 +++++--
llvm/test/CodeGen/AMDGPU/fminimum3.ll | 594 +++++--
.../CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll | 2 +
llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll | 1224 ++++++-------
llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll | 1113 ++++++------
llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll | 1569 ++++++++---------
llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll | 1223 ++++++-------
llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll | 1113 ++++++------
llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll | 1569 ++++++++---------
llvm/test/MC/AMDGPU/flat-scratch-gfx940.s | 1 +
llvm/test/MC/AMDGPU/gfx940_asm_features.s | 1 +
llvm/test/MC/AMDGPU/gfx950-unsupported.s | 179 ++
.../MC/AMDGPU/gfx950_invalid_encoding.txt | 13 +
.../Disassembler/AMDGPU/gfx940_features.txt | 1 +
.../Object/AMDGPU/elf-header-flags-mach.yaml | 7 +
.../llvm-objdump/ELF/AMDGPU/subtarget.ll | 5 +
.../llvm-readobj/ELF/AMDGPU/elf-headers.test | 9 +
llvm/tools/llvm-readobj/ELFDumper.cpp | 1 +
offload/DeviceRTL/CMakeLists.txt | 2 +-
43 files changed, 5148 insertions(+), 4519 deletions(-)
create mode 100644 llvm/test/MC/AMDGPU/gfx950-unsupported.s
create mode 100644 llvm/test/MC/AMDGPU/gfx950_invalid_encoding.txt
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 2bd67138ecc048..0efe62f1804cd0 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -712,6 +712,8 @@ Target Specific Changes
AMDGPU Support
^^^^^^^^^^^^^^
+- Initial support for gfx950
+
- Added headers ``gpuintrin.h`` and ``amdgpuintrin.h`` that contains common
definitions for GPU builtin functions. This header can be included for OpenMP,
CUDA, HIP, OpenCL, and C/C++.
diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h
index 721e8981af6ffc..c2a4addf488df1 100644
--- a/clang/include/clang/Basic/Cuda.h
+++ b/clang/include/clang/Basic/Cuda.h
@@ -107,6 +107,7 @@ enum class OffloadArch {
GFX940,
GFX941,
GFX942,
+ GFX950,
GFX10_1_GENERIC,
GFX1010,
GFX1011,
diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp
index 59c932468cd891..d56609a2a8f24a 100644
--- a/clang/lib/Basic/Cuda.cpp
+++ b/clang/lib/Basic/Cuda.cpp
@@ -125,6 +125,7 @@ static const OffloadArchToStringMap arch_names[] = {
GFX(940), // gfx940
GFX(941), // gfx941
GFX(942), // gfx942
+ GFX(950), // gfx950
{OffloadArch::GFX10_1_GENERIC, "gfx10-1-generic", "compute_amdgcn"},
GFX(1010), // gfx1010
GFX(1011), // gfx1011
diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp
index 0897032c4b8546..dbc3fec3657610 100644
--- a/clang/lib/Basic/Targets/NVPTX.cpp
+++ b/clang/lib/Basic/Targets/NVPTX.cpp
@@ -209,6 +209,7 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
case OffloadArch::GFX940:
case OffloadArch::GFX941:
case OffloadArch::GFX942:
+ case OffloadArch::GFX950:
case OffloadArch::GFX10_1_GENERIC:
case OffloadArch::GFX1010:
case OffloadArch::GFX1011:
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
index 73e3f9e256f0d9..756f0482b8ea72 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -2304,6 +2304,7 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(const OMPRequiresDecl *D) {
case OffloadArch::GFX940:
case OffloadArch::GFX941:
case OffloadArch::GFX942:
+ case OffloadArch::GFX950:
case OffloadArch::GFX10_1_GENERIC:
case OffloadArch::GFX1010:
case OffloadArch::GFX1011:
diff --git a/clang/test/CodeGenOpenCL/amdgpu-features.cl b/clang/test/CodeGenOpenCL/amdgpu-features.cl
index 8b56ec94f2c4ee..5c324032b51956 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-features.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-features.cl
@@ -32,6 +32,7 @@
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx940 -emit-llvm -o - %s | FileCheck --check-prefix=GFX940 %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx941 -emit-llvm -o - %s | FileCheck --check-prefix=GFX941 %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx942 -emit-llvm -o - %s | FileCheck --check-prefix=GFX942 %s
+// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx950 -emit-llvm -o - %s | FileCheck --check-prefix=GFX950 %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1010 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1010 %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1011 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1011 %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1012 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1012 %s
@@ -88,6 +89,7 @@
// GFX941: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts"
// GFX942: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts"
// GFX9_4_Generic: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
+// GFX950: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
// GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
// GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
// GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
diff --git a/clang/test/Driver/amdgpu-macros.cl b/clang/test/Driver/amdgpu-macros.cl
index d354f933c5ad78..d97b2ddb1fc663 100644
--- a/clang/test/Driver/amdgpu-macros.cl
+++ b/clang/test/Driver/amdgpu-macros.cl
@@ -110,6 +110,7 @@
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx940 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx940 -DFAMILY=GFX9
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx941 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx941 -DFAMILY=GFX9
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx942 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx942 -DFAMILY=GFX9
+// RUN: %clang -E -dM -target amdgcn -mcpu=gfx950 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx950 -DFAMILY=GFX9
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1010 -DFAMILY=GFX10
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1011 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1011 -DFAMILY=GFX10
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1012 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1012 -DFAMILY=GFX10
diff --git a/clang/test/Driver/amdgpu-mcpu.cl b/clang/test/Driver/amdgpu-mcpu.cl
index ba578435072985..7c34d3ec6c63a9 100644
--- a/clang/test/Driver/amdgpu-mcpu.cl
+++ b/clang/test/Driver/amdgpu-mcpu.cl
@@ -95,6 +95,7 @@
// RUN: %clang -### -target amdgcn -mcpu=gfx940 %s 2>&1 | FileCheck --check-prefix=GFX940 %s
// RUN: %clang -### -target amdgcn -mcpu=gfx941 %s 2>&1 | FileCheck --check-prefix=GFX941 %s
// RUN: %clang -### -target amdgcn -mcpu=gfx942 %s 2>&1 | FileCheck --check-prefix=GFX942 %s
+// RUN: %clang -### -target amdgcn -mcpu=gfx950 %s 2>&1 | FileCheck --check-prefix=GFX950 %s
// RUN: %clang -### -target amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefix=GFX1010 %s
// RUN: %clang -### -target amdgcn -mcpu=gfx1011 %s 2>&1 | FileCheck --check-prefix=GFX1011 %s
// RUN: %clang -### -target amdgcn -mcpu=gfx1012 %s 2>&1 | FileCheck --check-prefix=GFX1012 %s
@@ -150,6 +151,7 @@
// GFX940: "-target-cpu" "gfx940"
// GFX941: "-target-cpu" "gfx941"
// GFX942: "-target-cpu" "gfx942"
+// GFX950: "-target-cpu" "gfx950"
// GFX1010: "-target-cpu" "gfx1010"
// GFX1011: "-target-cpu" "gfx1011"
// GFX1012: "-target-cpu" "gfx1012"
diff --git a/clang/test/Misc/target-invalid-cpu-note/amdgcn.c b/clang/test/Misc/target-invalid-cpu-note/amdgcn.c
index 4e675871f1e5bd..642d2df211c21a 100644
--- a/clang/test/Misc/target-invalid-cpu-note/amdgcn.c
+++ b/clang/test/Misc/target-invalid-cpu-note/amdgcn.c
@@ -48,6 +48,7 @@
// CHECK-SAME: {{^}}, gfx940
// CHECK-SAME: {{^}}, gfx941
// CHECK-SAME: {{^}}, gfx942
+// CHECK-SAME: {{^}}, gfx950
// CHECK-SAME: {{^}}, gfx1010
// CHECK-SAME: {{^}}, gfx1011
// CHECK-SAME: {{^}}, gfx1012
diff --git a/clang/test/Misc/target-invalid-cpu-note/nvptx.c b/clang/test/Misc/target-invalid-cpu-note/nvptx.c
index 44fe07065b2428..3ea6c02d6b3846 100644
--- a/clang/test/Misc/target-invalid-cpu-note/nvptx.c
+++ b/clang/test/Misc/target-invalid-cpu-note/nvptx.c
@@ -54,6 +54,7 @@
// CHECK-SAME: {{^}}, gfx940
// CHECK-SAME: {{^}}, gfx941
// CHECK-SAME: {{^}}, gfx942
+// CHECK-SAME: {{^}}, gfx950
// CHECK-SAME: {{^}}, gfx10-1-generic
// CHECK-SAME: {{^}}, gfx1010
// CHECK-SAME: {{^}}, gfx1011
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index c180ca5fcebef3..b85b680b9c82d3 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -399,6 +399,13 @@ Every processor supports every OS ABI (see :ref:`amdgpu-os`) with the following
work-item
IDs
+ ``gfx950`` ``amdgcn`` dGPU - sramecc - Architected *TBA*
+ - tgsplit flat
+ - xnack scratch .. TODO::
+ - kernarg preload - Packed
+ work-item Add product
+ IDs names.
+
**GCN GFX10.1 (RDNA 1)** [AMD-GCN-GFX10-RDNA1]_
-----------------------------------------------------------------------------------------------------------------------
``gfx1010`` ``amdgcn`` dGPU - cumode - Absolute - *rocm-amdhsa* - Radeon RX 5700
@@ -2178,7 +2185,7 @@ The AMDGPU backend uses the following ELF header:
``EF_AMDGPU_MACH_AMDGCN_GFX942`` 0x04c ``gfx942``
*reserved* 0x04d Reserved.
``EF_AMDGPU_MACH_AMDGCN_GFX1201`` 0x04e ``gfx1201``
- *reserved* 0x04f Reserved.
+ ``EF_AMDGPU_MACH_AMDGCN_GFX950`` 0x04f ``gfx950``
*reserved* 0x050 Reserved.
``EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC`` 0x051 ``gfx9-generic``
``EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC`` 0x052 ``gfx10-1-generic``
diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h
index 6c05ea7208e1f1..fd32a6ec19652b 100644
--- a/llvm/include/llvm/BinaryFormat/ELF.h
+++ b/llvm/include/llvm/BinaryFormat/ELF.h
@@ -811,7 +811,7 @@ enum : unsigned {
EF_AMDGPU_MACH_AMDGCN_GFX942 = 0x04c,
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X4D = 0x04d,
EF_AMDGPU_MACH_AMDGCN_GFX1201 = 0x04e,
- EF_AMDGPU_MACH_AMDGCN_RESERVED_0X4F = 0x04f,
+ EF_AMDGPU_MACH_AMDGCN_GFX950 = 0x04f,
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X50 = 0x050,
EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC = 0x051,
EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC = 0x052,
diff --git a/llvm/include/llvm/TargetParser/TargetParser.h b/llvm/include/llvm/TargetParser/TargetParser.h
index c6db4dfd7f5159..55e7b417428c4e 100644
--- a/llvm/include/llvm/TargetParser/TargetParser.h
+++ b/llvm/include/llvm/TargetParser/TargetParser.h
@@ -86,18 +86,19 @@ enum GPUKind : uint32_t {
GK_GFX940 = 68,
GK_GFX941 = 69,
GK_GFX942 = 70,
-
- GK_GFX1010 = 71,
- GK_GFX1011 = 72,
- GK_GFX1012 = 73,
- GK_GFX1013 = 74,
- GK_GFX1030 = 75,
- GK_GFX1031 = 76,
- GK_GFX1032 = 77,
- GK_GFX1033 = 78,
- GK_GFX1034 = 79,
- GK_GFX1035 = 80,
- GK_GFX1036 = 81,
+ GK_GFX950 = 71,
+
+ GK_GFX1010 = 72,
+ GK_GFX1011 = 73,
+ GK_GFX1012 = 74,
+ GK_GFX1013 = 75,
+ GK_GFX1030 = 76,
+ GK_GFX1031 = 77,
+ GK_GFX1032 = 78,
+ GK_GFX1033 = 79,
+ GK_GFX1034 = 80,
+ GK_GFX1035 = 81,
+ GK_GFX1036 = 82,
GK_GFX1100 = 90,
GK_GFX1101 = 91,
diff --git a/llvm/lib/Object/ELFObjectFile.cpp b/llvm/lib/Object/ELFObjectFile.cpp
index 9dc39936ffd8bb..2ffb2ac5e7e453 100644
--- a/llvm/lib/Object/ELFObjectFile.cpp
+++ b/llvm/lib/Object/ELFObjectFile.cpp
@@ -550,6 +550,8 @@ StringRef ELFObjectFileBase::getAMDGPUCPUName() const {
return "gfx941";
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX942:
return "gfx942";
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX950:
+ return "gfx950";
// AMDGCN GFX10.
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010:
diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp
index 130b8798ab4a46..ca0ea03452d3be 100644
--- a/llvm/lib/ObjectYAML/ELFYAML.cpp
+++ b/llvm/lib/ObjectYAML/ELFYAML.cpp
@@ -609,6 +609,7 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX940, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX941, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX942, EF_AMDGPU_MACH);
+ BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX950, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1010, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1011, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1012, EF_AMDGPU_MACH);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index d7feaef8c4a97d..d028c1f5ca7613 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -360,6 +360,12 @@ def FeatureGFX940Insts : SubtargetFeature<"gfx940-insts",
"Additional instructions for GFX940+"
>;
+def FeatureGFX950Insts : SubtargetFeature<"gfx950-insts",
+ "GFX950Insts",
+ "true",
+ "Additional instructions for GFX950+"
+>;
+
def FeatureGFX10Insts : SubtargetFeature<"gfx10-insts",
"GFX10Insts",
"true",
@@ -1470,6 +1476,14 @@ def FeatureISAVersion9_4_Common : FeatureSet<
FeatureFlatBufferGlobalAtomicFaddF64Inst
]>;
+def FeatureISAVersion9_5_Common : FeatureSet<
+ !listconcat(FeatureISAVersion9_4_Common.Features,
+ [FeatureFP8Insts,
+ FeatureFP8ConversionInsts,
+ FeatureCvtFP8VOP1Bug,
+ FeatureGFX950Insts
+ ])>;
+
def FeatureISAVersion9_4_0 : FeatureSet<
!listconcat(FeatureISAVersion9_4_Common.Features,
[
@@ -1503,6 +1517,8 @@ def FeatureISAVersion9_4_Generic : FeatureSet<
!listconcat(FeatureISAVersion9_4_Common.Features,
[FeatureRequiresCOV6])>;
+def FeatureISAVersion9_5_0 : FeatureSet<FeatureISAVersion9_5_Common.Features>;
+
def FeatureISAVersion10_Common : FeatureSet<
[FeatureGFX10,
FeatureLDSBankCount32,
diff --git a/llvm/lib/Target/AMDGPU/GCNProcessors.td b/llvm/lib/Target/AMDGPU/GCNProcessors.td
index 067043d290b760..3403cbab526d46 100644
--- a/llvm/lib/Target/AMDGPU/GCNProcessors.td
+++ b/llvm/lib/Target/AMDGPU/GCNProcessors.td
@@ -204,6 +204,10 @@ def : ProcessorModel<"gfx942", SIDPGFX940FullSpeedModel,
FeatureISAVersion9_4_2.Features
>;
+def : ProcessorModel<"gfx950", SIDPGFX940FullSpeedModel,
+ FeatureISAVersion9_5_0.Features
+>;
+
// [gfx900, gfx902, gfx904, gfx906, gfx909, gfx90c]
def : ProcessorModel<"gfx9-generic", SIQuarterSpeedModel,
FeatureISAVersion9_Generic.Features
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 6ff964077d8fd0..1b06756a8a1016 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -106,6 +106,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool GFX9Insts = false;
bool GFX90AInsts = false;
bool GFX940Insts = false;
+ bool GFX950Insts = false;
bool GFX10Insts = false;
bool GFX11Insts = false;
bool GFX12Insts = false;
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 55ba5ebbebb8fd..ffde4d33f1341a 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -96,6 +96,7 @@ StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX940: AK = GK_GFX940; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX941: AK = GK_GFX941; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX942: AK = GK_GFX942; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX950: AK = GK_GFX950; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: AK = GK_GFX1010; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011: AK = GK_GFX1011; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012: AK = GK_GFX1012; break;
@@ -182,6 +183,7 @@ unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) {
case GK_GFX940: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX940;
case GK_GFX941: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX941;
case GK_GFX942: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX942;
+ case GK_GFX950: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX950;
case GK_GFX1010: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010;
case GK_GFX1011: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011;
case GK_GFX1012: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012;
diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp
index 7dfb8c021a8a5f..b0385915f3042b 100644
--- a/llvm/lib/TargetParser/TargetParser.cpp
+++ b/llvm/lib/TargetParser/TargetParser.cpp
@@ -107,6 +107,7 @@ constexpr GPUInfo AMDGCNGPUs[] = {
{{"gfx940"}, {"gfx940"}, GK_GFX940, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
{{"gfx941"}, {"gfx941"}, GK_GFX941, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
{{"gfx942"}, {"gfx942"}, GK_GFX942, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
+ {{"gfx950"}, {"gfx950"}, GK_GFX950, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
{{"gfx1010"}, {"gfx1010"}, GK_GFX1010, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP},
{{"gfx1011"}, {"gfx1011"}, GK_GFX1011, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP},
{{"gfx1012"}, {"gfx1012"}, GK_GFX1012, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP},
@@ -262,6 +263,7 @@ AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) {
case GK_GFX940: return {9, 4, 0};
case GK_GFX941: return {9, 4, 1};
case GK_GFX942: return {9, 4, 2};
+ case GK_GFX950: return {9, 5, 0};
case GK_GFX1010: return {10, 1, 0};
case GK_GFX1011: return {10, 1, 1};
case GK_GFX1012: return {10, 1, 2};
@@ -361,7 +363,8 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
Features["wavefrontsize32"] = true;
Features["wavefrontsize64"] = true;
} else if (T.isAMDGCN()) {
- switch (parseArchAMDGCN(GPU)) {
+ AMDGPU::GPUKind Kind = parseArchAMDGCN(GPU);
+ switch (Kind) {
case GK_GFX1201:
case GK_GFX1200:
case GK_GFX12_GENERIC:
@@ -466,12 +469,16 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
Features["s-memtime-inst"] = true;
Features["gws"] = true;
break;
+ case GK_GFX950:
+ Features["gfx950-insts"] = true;
+ [[fallthrough]];
case GK_GFX942:
case GK_GFX941:
case GK_GFX940:
Features["fp8-insts"] = true;
Features["fp8-conversion-insts"] = true;
- Features["xf32-insts"] = true;
+ if (Kind != GK_GFX950)
+ Features["xf32-insts"] = true;
[[fallthrough]];
case GK_GFX9_4_GENERIC:
Features["gfx940-insts"] = true;
diff --git a/llvm/test/CodeGen/AMDGPU/bf16-conversions.ll b/llvm/test/CodeGen/AMDGPU/bf16-conversions.ll
index 1c9f35dd45feeb..425fc5884cec7f 100644
--- a/llvm/test/CodeGen/AMDGPU/bf16-conversions.ll
+++ b/llvm/test/CodeGen/AMDGPU/bf16-conversions.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc -march=amdgcn -mcpu=gfx940 < %s | FileCheck --check-prefixes=GCN %s
+; RUN: llc -march=amdgcn -mcpu=gfx940 < %s | FileCheck --check-prefixes=GCN,GFX-940 %s
+; RUN: llc -march=amdgcn -mcpu=gfx950 < %s | FileCheck --check-prefixes=GCN,GFX-950 %s
; TODO: Add global-isel when it can support bf16
@@ -198,19 +199,33 @@ entry:
}
define amdgpu_ps void @fptrunc_f32_to_bf16(float %a, ptr %out) {
-; GCN-LABEL: fptrunc_f32_to_bf16:
-; GCN: ; %bb.0: ; %entry
-; GCN-NEXT: v_mov_b32_e32 v3, v2
-; GCN-NEXT: v_mov_b32_e32 v2, v1
-; GCN-NEXT: v_bfe_u32 v1, v0, 16, 1
-; GCN-NEXT: s_movk_i32 s0, 0x7fff
-; GCN-NEXT: v_add3_u32 v1, v1, v0, s0
-; GCN-NEXT: v_or_b32_e32 v4, 0x400000, v0
-; GCN-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
-; GCN-NEXT: s_nop 1
-; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v4, vcc
-; GCN-NEXT: flat_store_short_d16_hi v[2:3], v0 sc0 sc1
-; GCN-NEXT: s_endpgm
+; GFX-940-LABEL: fptrunc_f32_to_bf16:
+; GFX-940: ; %bb.0: ; %entry
+; GFX-940-NEXT: v_mov_b32_e32 v3, v2
+; GFX-940-NEXT: v_mov_b32_e32 v2, v1
+; GFX-940-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX-940-NEXT: s_movk_i32 s0, 0x7fff
+; GFX-940-NEXT: v_add3_u32 v1, v1, v0, s0
+; GFX-940-NEXT: v_or_b32_e32 v4, 0x400000, v0
+; GFX-940-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
+; GFX-940-NEXT: s_nop 1
+; GFX-940-NEXT: v_cndmask_b32_e32 v0, v1, v4, vcc
+; GFX-940-NEXT: flat_store_short_d16_hi v[2:3], v0 sc0 sc1
+; GFX-940-NEXT: s_endpgm
+;
+; GFX-950-LABEL: fptrunc_f32_to_bf16:
+; GFX-950: ; %bb.0: ; %entry
+; GFX-950-NEXT: v_mov_b32_e32 v3, v2
+; GFX-950-NEXT: v_mov_b32_e32 v2, v1
+; GFX-950-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX-950-NEXT: s_movk_i32 s0, 0x7fff
+; GFX-950-NEXT: v_add3_u32 v1, v1, v0, s0
+; GFX-950-NEXT: v_or_b32_e32 v4, 0x400000, v0
+; GFX-950-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
+; GFX-950-NEXT: s_nop 1
+; GFX-950-NEXT: v_cndmask_b32_e32 v0, v1, v4, vcc
+; GFX-950-NEXT: flat_store_short_d16_hi v[2:3], v0
+; GFX-950-NEXT: s_endpgm
entry:
%a.cvt = fptrunc float %a to bfloat
store bfloat %a.cvt, ptr %out
@@ -218,20 +233,35 @@ entry:
}
define amdgpu_ps void @fptrunc_f32_to_bf16_abs(float %a, ptr %out) {
-; GCN-LABEL: fptrunc_f32_to_bf16_abs:
-; GCN: ; %bb.0: ; %entry
-; GCN-NEXT: v_mov_b32_e32 v3, v2
-; GCN-NEXT: v_mov_b32_e32 v2, v1
-; GCN-NEXT: v_and_b32_e32 v1, 0x7fffffff, v0
-; GCN-NEXT: v_bfe_u32 v4, v1, 16, 1
-; GCN-NEXT: s_movk_i32 s0, 0x7fff
-; GCN-NEXT: v_add3_u32 v4, v4, v1, s0
-; GCN-NEXT: v_or_b32_e32 v1, 0x400000, v1
-; GCN-NEXT: v_cmp_u_f32_e64 vcc, |v0|, |v0|
-; GCN-NEXT: s_nop 1
-; GCN-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
-; GCN-NEXT: flat_store_short_d16_hi v[2:3], v0 sc0 sc1
-; GCN-NEXT: s_endpgm
+; GFX-940-LABEL: fptrunc_f32_to_bf16_abs:
+; GFX-940: ; %bb.0: ; %entry
+; GFX-940-NEXT: v_mov_b32_e32 v3, v2
+; GFX-940-NEXT: v_mov_b32_e32 v2, v1
+; GFX-940-NEXT: v_and_b32_e32 v1, 0x7fffffff, v0
+; GFX-940-NEXT: v_bfe_u32 v4, v1, 16, 1
+; GFX-940-NEXT: s_movk_i32 s0, 0x7fff
+; GFX-940-NEXT: v_add3_u32 v4, v4, v1, s0
+; GFX-940-NEXT: v_or_b32_e32 v1, 0x400000, v1
+; GFX-940-NEXT: v_cmp_u_f32_e64 vcc, |v0|, |v0|
+; GFX-940-NEXT: s_nop 1
+; GFX-940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX-940-NEXT: flat_store_short_d16_hi v[2:3], v0 sc0 sc1
+; GFX-940-NEXT: s_endpgm
+;
+; GFX-950-LABEL: fptrunc_f32_to_bf16_abs:
+; GFX-950: ; %bb.0: ; %entry
+; GFX-950-NEXT: v_mov_b32_e32 v3, v2
+; GFX-950-NEXT: v_mov_b32_e32 v2, v1
+; GFX-950-NEXT: v_and_b32_e32 v1, 0x7fffffff, v0
+; GFX-950-NEXT: v_bfe_u32 v4, v1, 16, 1
+; GFX-950-NEXT: s_movk_i32 s0, 0x7fff
+; GFX-950-NEXT: v_add3_u32 v4, v4, v1, s0
+; GFX-950-NEXT: v_or_b32_e32 v1, 0x400000, v1
+; GFX-950-NEXT: v_cmp_u_f32_e64 vcc, |v0|, |v0|
+; GFX-950-NEXT: s_nop 1
+; GFX-950-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX-950-NEXT: flat_store_short_d16_hi v[2:3], v0
+; GFX-950-NEXT: s_endpgm
entry:
%a.abs = call float @llvm.fabs.f32(float %a)
%a.cvt = fptrunc float %a.abs to bfloat
@@ -240,20 +270,35 @@ entry:
}
define amdgpu_ps void @fptrunc_f32_to_bf16_neg(float %a, ptr %out) {
-; GCN-LABEL: fptrunc_f32_to_bf16_neg:
-; GCN: ; %bb.0: ; %entry
-; GCN-NEXT: v_mov_b32_e32 v3, v2
-; GCN-NEXT: v_mov_b32_e32 v2, v1
-; GCN-NEXT: v_xor_b32_e32 v1, 0x80000000, v0
-; GCN-NEXT: v_bfe_u32 v4, v1, 16, 1
-; GCN-NEXT: s_movk_i32 s0, 0x7fff
-; GCN-NEXT: v_add3_u32 v4, v4, v1, s0
-; GCN-NEXT: v_or_b32_e32 v1, 0x400000, v1
-; GCN-NEXT: v_cmp_u_f32_e64 vcc, -v0, -v0
-; GCN-NEXT: s_nop 1
-; GCN-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
-; GCN-NEXT: flat_store_short_d16_hi v[2:3], v0 sc0 sc1
-; GCN-NEXT: s_endpgm
+; GFX-940-LABEL: fptrunc_f32_to_bf16_neg:
+; GFX-940: ; %bb.0: ; %entry
+; GFX-940-NEXT: v_mov_b32_e32 v3, v2
+; GFX-940-NEXT: v_mov_b32_e32 v2, v1
+; GFX-940-NEXT: v_xor_b32_e32 v1, 0x80000000, v0
+; GFX-940-NEXT: v_bfe_u32 v4, v1, 16, 1
+; GFX-940-NEXT: s_movk_i32 s0, 0x7fff
+; GFX-940-NEXT: v_add3_u32 v4, v4, v1, s0
+; GFX-940-NEXT: v_or_b32_e32 v1, 0x400000, v1
+; GFX-940-NEXT: v_cmp_u_f32_e64 vcc, -v0, -v0
+; GFX-940-NEXT: s_nop 1
+; GFX-940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX-940-NEXT: flat_store_short_d16_hi v[2:3], v0 sc0 sc1
+; GFX-940-NEXT: s_endpgm
+;
+; GFX-950-LABEL: fptrunc_f32_to_bf16_neg:
+; GFX-950: ; %bb.0: ; %entry
+; GFX-950-NEXT: v_mov_b32_e32 v3, v2
+; GFX-950-NEXT: v_mov_b32_e32 v2, v1
+; GFX-950-NEXT: v_xor_b32_e32 v1, 0x80000000, v0
+; GFX-950-NEXT: v_bfe_u32 v4, v1, 16, 1
+; GFX-950-NEXT: s_movk_i32 s0, 0x7fff
+; GFX-950-NEXT: v_add3_u32 v4, v4, v1, s0
+; GFX-950-NEXT: v_or_b32_e32 v1, 0x400000, v1
+; GFX-950-NEXT: v_cmp_u_f32_e64 vcc, -v0, -v0
+; GFX-950-NEXT: s_nop 1
+; GFX-950-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX-950-NEXT: flat_store_short_d16_hi v[2:3], v0
+; GFX-950-NEXT: s_endpgm
entry:
%a.neg = fneg float %a
%a.cvt = fptrunc float %a.neg to bfloat
@@ -262,29 +307,53 @@ entry:
}
define amdgpu_ps void @fptrunc_f64_to_bf16(double %a, ptr %out) {
-; GCN-LABEL: fptrunc_f64_to_bf16:
-; GCN: ; %bb.0: ; %entry
-; GCN-NEXT: v_cvt_f32_f64_e64 v6, |v[0:1]|
-; GCN-NEXT: v_cvt_f64_f32_e32 v[4:5], v6
-; GCN-NEXT: v_and_b32_e32 v7, 1, v6
-; GCN-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, v[4:5]
-; GCN-NEXT: v_cmp_nlg_f64_e64 s[0:1], |v[0:1]|, v[4:5]
-; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v7
-; GCN-NEXT: v_cndmask_b32_e64 v4, -1, 1, s[2:3]
-; GCN-NEXT: v_add_u32_e32 v4, v6, v4
-; GCN-NEXT: s_or_b64 vcc, s[0:1], vcc
-; GCN-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
-; GCN-NEXT: s_brev_b32 s0, 1
-; GCN-NEXT: v_and_or_b32 v5, v1, s0, v4
-; GCN-NEXT: v_bfe_u32 v4, v4, 16, 1
-; GCN-NEXT: s_movk_i32 s0, 0x7fff
-; GCN-NEXT: v_add3_u32 v4, v4, v5, s0
-; GCN-NEXT: v_or_b32_e32 v5, 0x400000, v5
-; GCN-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
-; GCN-NEXT: s_nop 1
-; GCN-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
-; GCN-NEXT: flat_store_short_d16_hi v[2:3], v0 sc0 sc1
-; GCN-NEXT: s_endpgm
+; GFX-940-LABEL: fptrunc_f64_to_bf16:
+; GFX-940: ; %bb.0: ; %entry
+; GFX-940-NEXT: v_cvt_f32_f64_e64 v6, |v[0:1]|
+; GFX-940-NEXT: v_cvt_f64_f32_e32 v[4:5], v6
+; GFX-940-NEXT: v_and_b32_e32 v7, 1, v6
+; GFX-940-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, v[4:5]
+; GFX-940-NEXT: v_cmp_nlg_f64_e64 s[0:1], |v[0:1]|, v[4:5]
+; GFX-940-NEXT: v_cmp_eq_u32_e32 vcc, 1, v7
+; GFX-940-NEXT: v_cndmask_b32_e64 v4, -1, 1, s[2:3]
+; GFX-940-NEXT: v_add_u32_e32 v4, v6, v4
+; GFX-940-NEXT: s_or_b64 vcc, s[0:1], vcc
+; GFX-940-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
+; GFX-940-NEXT: s_brev_b32 s0, 1
+; GFX-940-NEXT: v_and_or_b32 v5, v1, s0, v4
+; GFX-940-NEXT: v_bfe_u32 v4, v4, 16, 1
+; GFX-940-NEXT: s_movk_i32 s0, 0x7fff
+; GFX-940-NEXT: v_add3_u32 v4, v4, v5, s0
+; GFX-940-NEXT: v_or_b32_e32 v5, 0x400000, v5
+; GFX-940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX-940-NEXT: s_nop 1
+; GFX-940-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
+; GFX-940-NEXT: flat_store_short_d16_hi v[2:3], v0 sc0 sc1
+; GFX-940-NEXT: s_endpgm
+;
+; GFX-950-LABEL: fptrunc_f64_to_bf16:
+; GFX-950: ; %bb.0: ; %entry
+; GFX-950-NEXT: v_cvt_f32_f64_e64 v6, |v[0:1]|
+; GFX-950-NEXT: v_cvt_f64_f32_e32 v[4:5], v6
+; GFX-950-NEXT: v_and_b32_e32 v7, 1, v6
+; GFX-950-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, v[4:5]
+; GFX-950-NEXT: v_cmp_nlg_f64_e64 s[0:1], |v[0:1]|, v[4:5]
+; GFX-950-NEXT: v_cmp_eq_u32_e32 vcc, 1, v7
+; GFX-950-NEXT: v_cndmask_b32_e64 v4, -1, 1, s[2:3]
+; GFX-950-NEXT: v_add_u32_e32 v4, v6, v4
+; GFX-950-NEXT: s_or_b64 vcc, s[0:1], vcc
+; GFX-950-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
+; GFX-950-NEXT: s_brev_b32 s0, 1
+; GFX-950-NEXT: v_and_or_b32 v5, v1, s0, v4
+; GFX-950-NEXT: v_bfe_u32 v4, v4, 16, 1
+; GFX-950-NEXT: s_movk_i32 s0, 0x7fff
+; GFX-950-NEXT: v_add3_u32 v4, v4, v5, s0
+; GFX-950-NEXT: v_or_b32_e32 v5, 0x400000, v5
+; GFX-950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX-950-NEXT: s_nop 1
+; GFX-950-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
+; GFX-950-NEXT: flat_store_short_d16_hi v[2:3], v0
+; GFX-950-NEXT: s_endpgm
entry:
%a.cvt = fptrunc double %a to bfloat
store bfloat %a.cvt, ptr %out
@@ -292,30 +361,55 @@ entry:
}
define amdgpu_ps void @fptrunc_f64_to_bf16_neg(double %a, ptr %out) {
-; GCN-LABEL: fptrunc_f64_to_bf16_neg:
-; GCN: ; %bb.0: ; %entry
-; GCN-NEXT: v_cvt_f32_f64_e64 v7, |v[0:1]|
-; GCN-NEXT: v_cvt_f64_f32_e32 v[4:5], v7
-; GCN-NEXT: v_and_b32_e32 v8, 1, v7
-; GCN-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, v[4:5]
-; GCN-NEXT: v_cmp_nlg_f64_e64 s[0:1], |v[0:1]|, v[4:5]
-; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8
-; GCN-NEXT: v_cndmask_b32_e64 v4, -1, 1, s[2:3]
-; GCN-NEXT: v_add_u32_e32 v4, v7, v4
-; GCN-NEXT: s_or_b64 vcc, s[0:1], vcc
-; GCN-NEXT: s_brev_b32 s4, 1
-; GCN-NEXT: v_xor_b32_e32 v6, 0x80000000, v1
-; GCN-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc
-; GCN-NEXT: v_and_or_b32 v5, v6, s4, v4
-; GCN-NEXT: v_bfe_u32 v4, v4, 16, 1
-; GCN-NEXT: s_movk_i32 s0, 0x7fff
-; GCN-NEXT: v_add3_u32 v4, v4, v5, s0
-; GCN-NEXT: v_or_b32_e32 v5, 0x400000, v5
-; GCN-NEXT: v_cmp_u_f64_e64 vcc, -v[0:1], -v[0:1]
-; GCN-NEXT: s_nop 1
-; GCN-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
-; GCN-NEXT: flat_store_short_d16_hi v[2:3], v0 sc0 sc1
-; GCN-NEXT: s_endpgm
+; GFX-940-LABEL: fptrunc_f64_to_bf16_neg:
+; GFX-940: ; %bb.0: ; %entry
+; GFX-940-NEXT: v_cvt_f32_f64_e64 v7, |v[0:1]|
+; GFX-940-NEXT: v_cvt_f64_f32_e32 v[4:5], v7
+; GFX-940-NEXT: v_and_b32_e32 v8, 1, v7
+; GFX-940-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, v[4:5]
+; GFX-940-NEXT: v_cmp_nlg_f64_e64 s[0:1], |v[0:1]|, v[4:5]
+; GFX-940-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8
+; GFX-940-NEXT: v_cndmask_b32_e64 v4, -1, 1, s[2:3]
+; GFX-940-NEXT: v_add_u32_e32 v4, v7, v4
+; GFX-940-NEXT: s_or_b64 vcc, s[0:1], vcc
+; GFX-940-NEXT: s_brev_b32 s4, 1
+; GFX-940-NEXT: v_xor_b32_e32 v6, 0x80000000, v1
+; GFX-940-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc
+; GFX-940-NEXT: v_and_or_b32 v5, v6, s4, v4
+; GFX-940-NEXT: v_bfe_u32 v4, v4, 16, 1
+; GFX-940-NEXT: s_movk_i32 s0, 0x7fff
+; GFX-940-NEXT: v_add3_u32 v4, v4, v5, s0
+; GFX-940-NEXT: v_or_b32_e32 v5, 0x400000, v5
+; GFX-940-NEXT: v_cmp_u_f64_e64 vcc, -v[0:1], -v[0:1]
+; GFX-940-NEXT: s_nop 1
+; GFX-940-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
+; GFX-940-NEXT: flat_store_short_d16_hi v[2:3], v0 sc0 sc1
+; GFX-940-NEXT: s_endpgm
+;
+; GFX-950-LABEL: fptrunc_f64_to_bf16_neg:
+; GFX-950: ; %bb.0: ; %entry
+; GFX-950-NEXT: v_cvt_f32_f64_e64 v7, |v[0:1]|
+; GFX-950-NEXT: v_cvt_f64_f32_e32 v[4:5], v7
+; GFX-950-NEXT: v_and_b32_e32 v8, 1, v7
+; GFX-950-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, v[4:5]
+; GFX-950-NEXT: v_cmp_nlg_f64_e64 s[0:1], |v[0:1]|, v[4:5]
+; GFX-950-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8
+; GFX-950-NEXT: v_cndmask_b32_e64 v4, -1, 1, s[2:3]
+; GFX-950-NEXT: v_add_u32_e32 v4, v7, v4
+; GFX-950-NEXT: s_or_b64 vcc, s[0:1], vcc
+; GFX-950-NEXT: s_brev_b32 s4, 1
+; GFX-950-NEXT: v_xor_b32_e32 v6, 0x80000000, v1
+; GFX-950-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc
+; GFX-950-NEXT: v_and_or_b32 v5, v6, s4, v4
+; GFX-950-NEXT: v_bfe_u32 v4, v4, 16, 1
+; GFX-950-NEXT: s_movk_i32 s0, 0x7fff
+; GFX-950-NEXT: v_add3_u32 v4, v4, v5, s0
+; GFX-950-NEXT: v_or_b32_e32 v5, 0x400000, v5
+; GFX-950-NEXT: v_cmp_u_f64_e64 vcc, -v[0:1], -v[0:1]
+; GFX-950-NEXT: s_nop 1
+; GFX-950-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
+; GFX-950-NEXT: flat_store_short_d16_hi v[2:3], v0
+; GFX-950-NEXT: s_endpgm
entry:
%a.neg = fneg double %a
%a.cvt = fptrunc double %a.neg to bfloat
@@ -324,30 +418,55 @@ entry:
}
define amdgpu_ps void @fptrunc_f64_to_bf16_abs(double %a, ptr %out) {
-; GCN-LABEL: fptrunc_f64_to_bf16_abs:
-; GCN: ; %bb.0: ; %entry
-; GCN-NEXT: v_cvt_f32_f64_e64 v7, |v[0:1]|
-; GCN-NEXT: v_cvt_f64_f32_e32 v[4:5], v7
-; GCN-NEXT: v_and_b32_e32 v8, 1, v7
-; GCN-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, v[4:5]
-; GCN-NEXT: v_cmp_nlg_f64_e64 s[0:1], |v[0:1]|, v[4:5]
-; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8
-; GCN-NEXT: v_cndmask_b32_e64 v4, -1, 1, s[2:3]
-; GCN-NEXT: v_add_u32_e32 v4, v7, v4
-; GCN-NEXT: s_or_b64 vcc, s[0:1], vcc
-; GCN-NEXT: v_and_b32_e32 v6, 0x7fffffff, v1
-; GCN-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc
-; GCN-NEXT: s_brev_b32 s0, 1
-; GCN-NEXT: v_and_or_b32 v5, v6, s0, v4
-; GCN-NEXT: v_bfe_u32 v4, v4, 16, 1
-; GCN-NEXT: s_movk_i32 s0, 0x7fff
-; GCN-NEXT: v_add3_u32 v4, v4, v5, s0
-; GCN-NEXT: v_or_b32_e32 v5, 0x400000, v5
-; GCN-NEXT: v_cmp_u_f64_e64 vcc, |v[0:1]|, |v[0:1]|
-; GCN-NEXT: s_nop 1
-; GCN-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
-; GCN-NEXT: flat_store_short_d16_hi v[2:3], v0 sc0 sc1
-; GCN-NEXT: s_endpgm
+; GFX-940-LABEL: fptrunc_f64_to_bf16_abs:
+; GFX-940: ; %bb.0: ; %entry
+; GFX-940-NEXT: v_cvt_f32_f64_e64 v7, |v[0:1]|
+; GFX-940-NEXT: v_cvt_f64_f32_e32 v[4:5], v7
+; GFX-940-NEXT: v_and_b32_e32 v8, 1, v7
+; GFX-940-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, v[4:5]
+; GFX-940-NEXT: v_cmp_nlg_f64_e64 s[0:1], |v[0:1]|, v[4:5]
+; GFX-940-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8
+; GFX-940-NEXT: v_cndmask_b32_e64 v4, -1, 1, s[2:3]
+; GFX-940-NEXT: v_add_u32_e32 v4, v7, v4
+; GFX-940-NEXT: s_or_b64 vcc, s[0:1], vcc
+; GFX-940-NEXT: v_and_b32_e32 v6, 0x7fffffff, v1
+; GFX-940-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc
+; GFX-940-NEXT: s_brev_b32 s0, 1
+; GFX-940-NEXT: v_and_or_b32 v5, v6, s0, v4
+; GFX-940-NEXT: v_bfe_u32 v4, v4, 16, 1
+; GFX-940-NEXT: s_movk_i32 s0, 0x7fff
+; GFX-940-NEXT: v_add3_u32 v4, v4, v5, s0
+; GFX-940-NEXT: v_or_b32_e32 v5, 0x400000, v5
+; GFX-940-NEXT: v_cmp_u_f64_e64 vcc, |v[0:1]|, |v[0:1]|
+; GFX-940-NEXT: s_nop 1
+; GFX-940-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
+; GFX-940-NEXT: flat_store_short_d16_hi v[2:3], v0 sc0 sc1
+; GFX-940-NEXT: s_endpgm
+;
+; GFX-950-LABEL: fptrunc_f64_to_bf16_abs:
+; GFX-950: ; %bb.0: ; %entry
+; GFX-950-NEXT: v_cvt_f32_f64_e64 v7, |v[0:1]|
+; GFX-950-NEXT: v_cvt_f64_f32_e32 v[4:5], v7
+; GFX-950-NEXT: v_and_b32_e32 v8, 1, v7
+; GFX-950-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, v[4:5]
+; GFX-950-NEXT: v_cmp_nlg_f64_e64 s[0:1], |v[0:1]|, v[4:5]
+; GFX-950-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8
+; GFX-950-NEXT: v_cndmask_b32_e64 v4, -1, 1, s[2:3]
+; GFX-950-NEXT: v_add_u32_e32 v4, v7, v4
+; GFX-950-NEXT: s_or_b64 vcc, s[0:1], vcc
+; GFX-950-NEXT: v_and_b32_e32 v6, 0x7fffffff, v1
+; GFX-950-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc
+; GFX-950-NEXT: s_brev_b32 s0, 1
+; GFX-950-NEXT: v_and_or_b32 v5, v6, s0, v4
+; GFX-950-NEXT: v_bfe_u32 v4, v4, 16, 1
+; GFX-950-NEXT: s_movk_i32 s0, 0x7fff
+; GFX-950-NEXT: v_add3_u32 v4, v4, v5, s0
+; GFX-950-NEXT: v_or_b32_e32 v5, 0x400000, v5
+; GFX-950-NEXT: v_cmp_u_f64_e64 vcc, |v[0:1]|, |v[0:1]|
+; GFX-950-NEXT: s_nop 1
+; GFX-950-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
+; GFX-950-NEXT: flat_store_short_d16_hi v[2:3], v0
+; GFX-950-NEXT: s_endpgm
entry:
%a.abs = call double @llvm.fabs.f64(double %a)
%a.cvt = fptrunc double %a.abs to bfloat
diff --git a/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll b/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll
index 4eac26e853c2a0..b64968c9336b93 100644
--- a/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll
+++ b/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll
@@ -80,6 +80,9 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck --check-prefixes=GFX942 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX942-NOXNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX942-XNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck --check-prefixes=GFX950 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX950-NOXNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX950-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck --check-prefixes=GFX1010 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX1010-NOXNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX1010-XNACK %s
@@ -180,6 +183,9 @@
; GFX942: .amdgcn_target "amdgcn-amd-amdhsa--gfx942"
; GFX942-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx942:xnack-"
; GFX942-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx942:xnack+"
+; GFX950: .amdgcn_target "amdgcn-amd-amdhsa--gfx950"
+; GFX950-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx950:xnack-"
+; GFX950-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx950:xnack+"
; GFX1010: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010"
; GFX1010-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010:xnack-"
; GFX1010-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010:xnack+"
diff --git a/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll b/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll
index f1f4edb94a6178..99344f16d4cd68 100644
--- a/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll
+++ b/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll
@@ -57,6 +57,7 @@
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx940 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX940 %s
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx941 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX941 %s
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx942 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX942 %s
+; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx950 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX950 %s
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx1010 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1010 %s
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx1011 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1011 %s
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx1012 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1012 %s
@@ -139,6 +140,7 @@
; GFX940: EF_AMDGPU_MACH_AMDGCN_GFX940 (0x40)
; GFX941: EF_AMDGPU_MACH_AMDGCN_GFX941 (0x4B)
; GFX942: EF_AMDGPU_MACH_AMDGCN_GFX942 (0x4C)
+; GFX950: EF_AMDGPU_MACH_AMDGCN_GFX950 (0x4F)
; GFX1010: EF_AMDGPU_MACH_AMDGCN_GFX1010 (0x33)
; GFX1011: EF_AMDGPU_MACH_AMDGCN_GFX1011 (0x34)
; GFX1012: EF_AMDGPU_MACH_AMDGCN_GFX1012 (0x35)
diff --git a/llvm/test/CodeGen/AMDGPU/elf-header-flags-sramecc.ll b/llvm/test/CodeGen/AMDGPU/elf-header-flags-sramecc.ll
index 961b89ab28f623..3ad2a9df764be5 100644
--- a/llvm/test/CodeGen/AMDGPU/elf-header-flags-sramecc.ll
+++ b/llvm/test/CodeGen/AMDGPU/elf-header-flags-sramecc.ll
@@ -12,6 +12,9 @@
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx940 < %s | llvm-readobj --file-header - | FileCheck --check-prefix=SRAM-ECC-GFX940 %s
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx940 -mattr=+sramecc < %s | llvm-readobj --file-header - | FileCheck --check-prefix=SRAM-ECC-GFX940 %s
+; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx950 < %s | llvm-readobj --file-header - | FileCheck --check-prefix=SRAM-ECC-GFX950 %s
+; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx950 -mattr=+sramecc < %s | llvm-readobj --file-header - | FileCheck --check-prefix=SRAM-ECC-GFX950 %s
+
; NO-SRAM-ECC-GFX906: Flags [
; NO-SRAM-ECC-GFX906-NEXT: EF_AMDGPU_FEATURE_XNACK_V3 (0x100)
; NO-SRAM-ECC-GFX906-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX906 (0x2F)
@@ -44,6 +47,11 @@
; SRAM-ECC-GFX940: EF_AMDGPU_MACH_AMDGCN_GFX940 (0x40)
; SRAM-ECC-GFX940: ]
+; SRAM-ECC-GFX950: Flags [
+; SRAM-ECC-GFX950: EF_AMDGPU_FEATURE_SRAMECC_V3 (0x200)
+; SRAM-ECC-GFX950: EF_AMDGPU_MACH_AMDGCN_GFX950 (0x4F)
+; SRAM-ECC-GFX950: ]
+
define amdgpu_kernel void @elf_header() {
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/fmaximum3.ll b/llvm/test/CodeGen/AMDGPU/fmaximum3.ll
index 27282a453075b3..08122cd0d89eab 100644
--- a/llvm/test/CodeGen/AMDGPU/fmaximum3.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmaximum3.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX940 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950 %s
define float @v_fmaximum3_f32(float %a, float %b, float %c) {
; GFX12-LABEL: v_fmaximum3_f32:
@@ -19,9 +20,11 @@ define float @v_fmaximum3_f32(float %a, float %b, float %c) {
; GFX9-NEXT: v_max_f32_e32 v3, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_max_f32_e32 v1, v0, v2
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.maximum.f32(float %a, float %b)
@@ -46,9 +49,11 @@ define float @v_fmaximum3_f32_commute(float %a, float %b, float %c) {
; GFX9-NEXT: v_max_f32_e32 v3, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_max_f32_e32 v1, v2, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.maximum.f32(float %a, float %b)
@@ -71,10 +76,13 @@ define amdgpu_ps i32 @s_fmaximum3_f32(float inreg %a, float inreg %b, float inre
; GFX9-NEXT: v_max_f32_e32 v1, s0, v0
; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX9-NEXT: v_max_f32_e32 v1, s2, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s2, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
; GFX9-NEXT: ; return to shader part epilog
%max0 = call float @llvm.maximum.f32(float %a, float %b)
@@ -101,9 +109,11 @@ define float @v_fmaximum3_f32_fabs0(float %a, float %b, float %c) {
; GFX9-NEXT: v_max_f32_e64 v3, |v0|, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v0|, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_max_f32_e32 v1, v0, v2
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call float @llvm.fabs.f32(float %a)
@@ -129,9 +139,11 @@ define float @v_fmaximum3_f32_fabs1(float %a, float %b, float %c) {
; GFX9-NEXT: v_max_f32_e64 v3, v0, |v1|
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v1|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_max_f32_e32 v1, v0, v2
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%b.fabs = call float @llvm.fabs.f32(float %b)
@@ -157,9 +169,11 @@ define float @v_fmaximum3_f32_fabs2(float %a, float %b, float %c) {
; GFX9-NEXT: v_max_f32_e32 v3, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_max_f32_e64 v1, v0, |v2|
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v2|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%c.fabs = call float @llvm.fabs.f32(float %c)
@@ -185,9 +199,11 @@ define float @v_fmaximum3_f32_fabs_all(float %a, float %b, float %c) {
; GFX9-NEXT: v_max_f32_e64 v3, |v0|, |v1|
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v1|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_max_f32_e64 v1, v0, |v2|
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v2|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call float @llvm.fabs.f32(float %a)
@@ -215,9 +231,11 @@ define float @v_fmaximum3_f32_fneg_all(float %a, float %b, float %c) {
; GFX9-NEXT: v_max_f32_e64 v3, -v0, -v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_max_f32_e64 v1, v0, -v2
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg float %a
@@ -245,9 +263,11 @@ define float @v_fmaximum3_f32_fneg_fabs_all(float %a, float %b, float %c) {
; GFX9-NEXT: v_max_f32_e64 v3, -|v0|, -|v1|
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -|v0|, -|v1|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_max_f32_e64 v1, v0, -|v2|
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -|v2|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call float @llvm.fabs.f32(float %a)
@@ -278,9 +298,11 @@ define float @v_fmaximum3_f32_fneg0(float %a, float %b, float %c) {
; GFX9-NEXT: v_max_f32_e64 v3, -v0, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_max_f32_e32 v1, v0, v2
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg float %a
@@ -306,9 +328,11 @@ define float @v_fmaximum3_f32_fneg1(float %a, float %b, float %c) {
; GFX9-NEXT: v_max_f32_e64 v3, v0, -v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_max_f32_e32 v1, v0, v2
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%b.fneg = fneg float %b
@@ -334,9 +358,11 @@ define float @v_fmaximum3_f32_fneg2(float %a, float %b, float %c) {
; GFX9-NEXT: v_max_f32_e32 v3, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_max_f32_e64 v1, v0, -v2
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%c.fneg = fneg float %c
@@ -362,9 +388,11 @@ define float @v_fmaximum3_f32_const0(float %b, float %c) {
; GFX9-NEXT: v_max_f32_e32 v2, 0x41000000, v0
; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: v_max_f32_e32 v2, v0, v1
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.maximum.f32(float 8.0, float %b)
@@ -389,9 +417,11 @@ define float @v_fmaximum3_f32__const2(float %a, float %b) {
; GFX9-NEXT: v_max_f32_e32 v2, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: v_max_f32_e32 v1, 0x41000000, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.maximum.f32(float %a, float %b)
@@ -416,9 +446,11 @@ define float @v_fmaximum3_f32_inlineimm0(float %b, float %c) {
; GFX9-NEXT: v_max_f32_e32 v2, 4.0, v0
; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: v_max_f32_e32 v2, v0, v1
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.maximum.f32(float 4.0, float %b)
@@ -443,9 +475,11 @@ define float @v_fmaximum3_f32__inlineimm(float %a, float %b) {
; GFX9-NEXT: v_max_f32_e32 v2, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: v_max_f32_e32 v1, 4.0, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.maximum.f32(float %a, float %b)
@@ -472,9 +506,11 @@ define float @v_fmaximum3_f32_const1_const2(float %a) {
; GFX9-NEXT: v_max_f32_e32 v1, 0x41000000, v0
; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX9-NEXT: v_max_f32_e32 v1, 0x41800000, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.maximum.f32(float %a, float 8.0)
@@ -500,15 +536,19 @@ define <2 x float> @v_fmaximum3_v2f32(<2 x float> %a, <2 x float> %b, <2 x float
; GFX9-NEXT: v_max_f32_e32 v6, v1, v3
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
; GFX9-NEXT: v_max_f32_e32 v3, v0, v2
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
; GFX9-NEXT: v_max_f32_e32 v2, v4, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v4, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
; GFX9-NEXT: v_max_f32_e32 v2, v5, v1
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v5, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %a, <2 x float> %b)
@@ -534,15 +574,19 @@ define <2 x float> @v_fmaximum3_v2f32_commute(<2 x float> %a, <2 x float> %b, <2
; GFX9-NEXT: v_max_f32_e32 v6, v1, v3
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
; GFX9-NEXT: v_max_f32_e32 v3, v0, v2
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
; GFX9-NEXT: v_max_f32_e32 v2, v0, v4
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
; GFX9-NEXT: v_max_f32_e32 v2, v1, v5
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %a, <2 x float> %b)
@@ -568,15 +612,19 @@ define <2 x float> @v_fmaximum3_v2f32__fabs_all(<2 x float> %a, <2 x float> %b,
; GFX9-NEXT: v_max_f32_e64 v6, |v1|, |v3|
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v1|, |v3|
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
; GFX9-NEXT: v_max_f32_e64 v3, |v0|, |v2|
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v2|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
; GFX9-NEXT: v_max_f32_e64 v2, v0, |v4|
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v4|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
; GFX9-NEXT: v_max_f32_e64 v2, v1, |v5|
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v1, |v5|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a)
@@ -605,15 +653,19 @@ define <2 x float> @v_fmaximum3_v2f32__fneg_all(<2 x float> %a, <2 x float> %b,
; GFX9-NEXT: v_max_f32_e64 v6, -v1, -v3
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v1, -v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
; GFX9-NEXT: v_max_f32_e64 v3, -v0, -v2
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
; GFX9-NEXT: v_max_f32_e64 v2, v0, -v4
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v4
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
; GFX9-NEXT: v_max_f32_e64 v2, v1, -v5
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v1, -v5
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg <2 x float> %a
@@ -642,15 +694,19 @@ define <2 x float> @v_fmaximum3_v2f32__inlineimm1(<2 x float> %a, <2 x float> %c
; GFX9-NEXT: v_max_f32_e32 v4, 2.0, v1
; GFX9-NEXT: v_mov_b32_e32 v5, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
; GFX9-NEXT: v_max_f32_e32 v4, 2.0, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
; GFX9-NEXT: v_max_f32_e32 v4, v0, v2
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
; GFX9-NEXT: v_max_f32_e32 v2, v1, v3
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %a, <2 x float> <float 2.0, float 2.0>)
@@ -676,15 +732,19 @@ define <2 x float> @v_fmaximum3_v2f32__inlineimm2(<2 x float> %a, <2 x float> %b
; GFX9-NEXT: v_max_f32_e32 v4, v1, v3
; GFX9-NEXT: v_mov_b32_e32 v5, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
; GFX9-NEXT: v_max_f32_e32 v3, v0, v2
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
; GFX9-NEXT: v_max_f32_e32 v2, 4.0, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc
; GFX9-NEXT: v_max_f32_e32 v2, 4.0, v1
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %a, <2 x float> %b)
@@ -711,21 +771,27 @@ define <3 x float> @v_fmaximum3_v3f32(<3 x float> %a, <3 x float> %b, <3 x float
; GFX9-NEXT: v_max_f32_e32 v9, v2, v5
; GFX9-NEXT: v_mov_b32_e32 v10, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
; GFX9-NEXT: v_max_f32_e32 v5, v1, v4
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
; GFX9-NEXT: v_max_f32_e32 v4, v0, v3
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc
; GFX9-NEXT: v_max_f32_e32 v3, v6, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v6, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc
; GFX9-NEXT: v_max_f32_e32 v3, v7, v1
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v7, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc
; GFX9-NEXT: v_max_f32_e32 v3, v8, v2
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v8, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %a, <3 x float> %b)
@@ -752,21 +818,27 @@ define <3 x float> @v_fmaximum3_v3f32_commute(<3 x float> %a, <3 x float> %b, <3
; GFX9-NEXT: v_max_f32_e32 v9, v2, v5
; GFX9-NEXT: v_mov_b32_e32 v10, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
; GFX9-NEXT: v_max_f32_e32 v5, v1, v4
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
; GFX9-NEXT: v_max_f32_e32 v4, v0, v3
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc
; GFX9-NEXT: v_max_f32_e32 v3, v0, v6
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v6
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc
; GFX9-NEXT: v_max_f32_e32 v3, v1, v7
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v7
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc
; GFX9-NEXT: v_max_f32_e32 v3, v2, v8
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v8
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %a, <3 x float> %b)
@@ -793,21 +865,27 @@ define <3 x float> @v_fmaximum3_v3f32__fabs_all(<3 x float> %a, <3 x float> %b,
; GFX9-NEXT: v_max_f32_e64 v9, |v2|, |v5|
; GFX9-NEXT: v_mov_b32_e32 v10, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v2|, |v5|
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
; GFX9-NEXT: v_max_f32_e64 v5, |v1|, |v4|
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v1|, |v4|
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
; GFX9-NEXT: v_max_f32_e64 v4, |v0|, |v3|
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v3|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc
; GFX9-NEXT: v_max_f32_e64 v3, v0, |v6|
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v6|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc
; GFX9-NEXT: v_max_f32_e64 v3, v1, |v7|
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v1, |v7|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc
; GFX9-NEXT: v_max_f32_e64 v3, v2, |v8|
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v2, |v8|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call <3 x float> @llvm.fabs.v3f32(<3 x float> %a)
@@ -837,21 +915,27 @@ define <3 x float> @v_fmaximum3_v3f32__fneg_all(<3 x float> %a, <3 x float> %b,
; GFX9-NEXT: v_max_f32_e64 v9, -v2, -v5
; GFX9-NEXT: v_mov_b32_e32 v10, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v2, -v5
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
; GFX9-NEXT: v_max_f32_e64 v5, -v1, -v4
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v1, -v4
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
; GFX9-NEXT: v_max_f32_e64 v4, -v0, -v3
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v3
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc
; GFX9-NEXT: v_max_f32_e64 v3, v0, -v6
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v6
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc
; GFX9-NEXT: v_max_f32_e64 v3, v1, -v7
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v1, -v7
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc
; GFX9-NEXT: v_max_f32_e64 v3, v2, -v8
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v2, -v8
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg <3 x float> %a
@@ -881,21 +965,27 @@ define <3 x float> @v_fmaximum3_v3f32__inlineimm1(<3 x float> %a, <3 x float> %c
; GFX9-NEXT: v_max_f32_e32 v6, 2.0, v2
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc
; GFX9-NEXT: v_max_f32_e32 v6, 2.0, v1
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
; GFX9-NEXT: v_max_f32_e32 v6, 2.0, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX9-NEXT: v_max_f32_e32 v6, v0, v3
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX9-NEXT: v_max_f32_e32 v3, v1, v4
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
; GFX9-NEXT: v_max_f32_e32 v3, v2, v5
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %a, <3 x float> <float 2.0, float 2.0, float 2.0>)
@@ -922,21 +1012,27 @@ define <3 x float> @v_fmaximum3_v3f32__inlineimm2(<3 x float> %a, <3 x float> %b
; GFX9-NEXT: v_max_f32_e32 v6, v2, v5
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc
; GFX9-NEXT: v_max_f32_e32 v5, v1, v4
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v5, vcc
; GFX9-NEXT: v_max_f32_e32 v4, v0, v3
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v5, vcc
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v4, vcc
; GFX9-NEXT: v_max_f32_e32 v3, 4.0, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
; GFX9-NEXT: v_max_f32_e32 v3, 4.0, v1
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
; GFX9-NEXT: v_max_f32_e32 v3, 4.0, v2
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %a, <3 x float> %b)
@@ -962,9 +1058,11 @@ define half @v_fmaximum3_f16(half %a, half %b, half %c) {
; GFX9-NEXT: v_max_f16_e32 v3, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_max_f16_e32 v1, v0, v2
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call half @llvm.maximum.f16(half %a, half %b)
@@ -989,9 +1087,11 @@ define half @v_fmaximum3_f16_commute(half %a, half %b, half %c) {
; GFX9-NEXT: v_max_f16_e32 v3, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_max_f16_e32 v1, v2, v0
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v2, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call half @llvm.maximum.f16(half %a, half %b)
@@ -1016,11 +1116,14 @@ define amdgpu_ps i32 @s_fmaximum3_f16(half inreg %a, half inreg %b, half inreg %
; GFX9-NEXT: v_max_f16_e32 v1, s0, v0
; GFX9-NEXT: v_mov_b32_e32 v2, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, s0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX9-NEXT: v_max_f16_e32 v1, s2, v0
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, s2, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
; GFX9-NEXT: ; return to shader part epilog
%max0 = call half @llvm.maximum.f16(half %a, half %b)
@@ -1048,9 +1151,11 @@ define half @v_fmaximum3_f16_fabs0(half %a, half %b, half %c) {
; GFX9-NEXT: v_max_f16_e64 v3, |v0|, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, |v0|, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_max_f16_e32 v1, v0, v2
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call half @llvm.fabs.f16(half %a)
@@ -1076,9 +1181,11 @@ define half @v_fmaximum3_f16_fabs1(half %a, half %b, half %c) {
; GFX9-NEXT: v_max_f16_e64 v3, v0, |v1|
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v0, |v1|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_max_f16_e32 v1, v0, v2
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%b.fabs = call half @llvm.fabs.f16(half %b)
@@ -1104,9 +1211,11 @@ define half @v_fmaximum3_f16_fabs2(half %a, half %b, half %c) {
; GFX9-NEXT: v_max_f16_e32 v3, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_max_f16_e64 v1, v0, |v2|
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v0, |v2|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%c.fabs = call half @llvm.fabs.f16(half %c)
@@ -1132,9 +1241,11 @@ define half @v_fmaximum3_f16_fabs_all(half %a, half %b, half %c) {
; GFX9-NEXT: v_max_f16_e64 v3, |v0|, |v1|
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, |v0|, |v1|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_max_f16_e64 v1, v0, |v2|
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v0, |v2|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call half @llvm.fabs.f16(half %a)
@@ -1162,9 +1273,11 @@ define half @v_fmaximum3_f16_fneg_all(half %a, half %b, half %c) {
; GFX9-NEXT: v_max_f16_e64 v3, -v0, -v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, -v0, -v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_max_f16_e64 v1, v0, -v2
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v0, -v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg half %a
@@ -1192,9 +1305,11 @@ define half @v_fmaximum3_f16_fneg_fabs_all(half %a, half %b, half %c) {
; GFX9-NEXT: v_max_f16_e64 v3, -|v0|, -|v1|
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, -|v0|, -|v1|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_max_f16_e64 v1, v0, -|v2|
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v0, -|v2|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call half @llvm.fabs.f16(half %a)
@@ -1225,9 +1340,11 @@ define half @v_fmaximum3_f16_fneg0(half %a, half %b, half %c) {
; GFX9-NEXT: v_max_f16_e64 v3, -v0, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, -v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_max_f16_e32 v1, v0, v2
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg half %a
@@ -1253,9 +1370,11 @@ define half @v_fmaximum3_f16_fneg1(half %a, half %b, half %c) {
; GFX9-NEXT: v_max_f16_e64 v3, v0, -v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v0, -v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_max_f16_e32 v1, v0, v2
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%b.fneg = fneg half %b
@@ -1281,9 +1400,11 @@ define half @v_fmaximum3_f16_fneg2(half %a, half %b, half %c) {
; GFX9-NEXT: v_max_f16_e32 v3, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_max_f16_e64 v1, v0, -v2
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v0, -v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%c.fneg = fneg half %c
@@ -1309,9 +1430,11 @@ define half @v_fmaximum3_f16_const0(half %b, half %c) {
; GFX9-NEXT: v_max_f16_e32 v2, 0x4800, v0
; GFX9-NEXT: v_mov_b32_e32 v3, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: v_max_f16_e32 v2, v0, v1
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call half @llvm.maximum.f16(half 8.0, half %b)
@@ -1336,9 +1459,11 @@ define half @v_fmaximum3_f16__const2(half %a, half %b) {
; GFX9-NEXT: v_max_f16_e32 v2, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v3, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: v_max_f16_e32 v1, 0x4800, v0
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call half @llvm.maximum.f16(half %a, half %b)
@@ -1363,9 +1488,11 @@ define half @v_fmaximum3_f16_inlineimm0(half %b, half %c) {
; GFX9-NEXT: v_max_f16_e32 v2, 4.0, v0
; GFX9-NEXT: v_mov_b32_e32 v3, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: v_max_f16_e32 v2, v0, v1
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call half @llvm.maximum.f16(half 4.0, half %b)
@@ -1390,9 +1517,11 @@ define half @v_fmaximum3_f16__inlineimm(half %a, half %b) {
; GFX9-NEXT: v_max_f16_e32 v2, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v3, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: v_max_f16_e32 v1, 4.0, v0
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call half @llvm.maximum.f16(half %a, half %b)
@@ -1419,9 +1548,11 @@ define half @v_fmaximum3_f16_const1_const2(half %a) {
; GFX9-NEXT: v_max_f16_e32 v1, 0x4800, v0
; GFX9-NEXT: v_mov_b32_e32 v2, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX9-NEXT: v_max_f16_e32 v1, 0x4c00, v0
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call half @llvm.maximum.f16(half %a, half 8.0)
@@ -1448,19 +1579,23 @@ define <2 x half> @v_fmaximum3_v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c
; GFX9-NEXT: v_pk_max_f16 v3, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v5, v4, v3, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v1, v0, v5, s4
+; GFX9-NEXT: v_perm_b32 v1, v0, v5, s0
; GFX9-NEXT: v_pk_max_f16 v1, v2, v1
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v2, v5
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v2, v0 src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX9-NEXT: v_perm_b32 v0, v0, v3, s4
+; GFX9-NEXT: v_perm_b32 v0, v0, v3, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> %b)
%max1 = call <2 x half> @llvm.maximum.v2f16(<2 x half> %c, <2 x half> %max0)
@@ -1486,19 +1621,23 @@ define <2 x half> @v_fmaximum3_v2f16_commute(<2 x half> %a, <2 x half> %b, <2 x
; GFX9-NEXT: v_pk_max_f16 v3, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v5, v4, v3, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v1, v0, v5, s4
+; GFX9-NEXT: v_perm_b32 v1, v0, v5, s0
; GFX9-NEXT: v_pk_max_f16 v1, v1, v2
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v5, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX9-NEXT: v_perm_b32 v0, v0, v3, s4
+; GFX9-NEXT: v_perm_b32 v0, v0, v3, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> %b)
%max1 = call <2 x half> @llvm.maximum.v2f16(<2 x half> %max0, <2 x half> %c)
@@ -1527,22 +1666,25 @@ define <2 x half> @v_fmaximum3_v2f16__fabs_all(<2 x half> %a, <2 x half> %b, <2
; GFX9-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v0
; GFX9-NEXT: v_and_b32_e32 v4, 0x7fff7fff, v1
; GFX9-NEXT: v_pk_max_f16 v3, v3, v4
-; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v3
; GFX9-NEXT: v_mov_b32_e32 v6, 0x7e00
+; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v3
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, |v0|, |v1| src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
+; GFX9-NEXT: v_and_b32_e32 v5, 0x7fff7fff, v2
; GFX9-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, |v0|, |v1|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v6, v3, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_and_b32_e32 v5, 0x7fff7fff, v2
-; GFX9-NEXT: v_perm_b32 v1, v4, v0, s4
+; GFX9-NEXT: v_perm_b32 v1, v4, v0, s0
; GFX9-NEXT: v_pk_max_f16 v1, v1, v5
-; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v1
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v4, |v2| src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v1
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v0, |v2|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v6, v1, vcc
-; GFX9-NEXT: v_perm_b32 v0, v3, v0, s4
+; GFX9-NEXT: v_perm_b32 v0, v3, v0, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %a)
%b.fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %b)
@@ -1571,19 +1713,23 @@ define <2 x half> @v_fmaximum3_v2f16__fneg_all(<2 x half> %a, <2 x half> %b, <2
; GFX9-NEXT: v_pk_max_f16 v3, v0, v1 neg_lo:[1,1] neg_hi:[1,1]
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, -v0, -v1
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v5, v4, v3, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, -v0, -v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v1, v0, v5, s4
+; GFX9-NEXT: v_perm_b32 v1, v0, v5, s0
; GFX9-NEXT: v_pk_max_f16 v1, v1, v2 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v5, -v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, -v2 src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX9-NEXT: v_perm_b32 v0, v0, v3, s4
+; GFX9-NEXT: v_perm_b32 v0, v0, v3, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg <2 x half> %a
%b.fneg = fneg <2 x half> %b
@@ -1610,21 +1756,25 @@ define <2 x half> @v_fmaximum3_v2f16__inlineimm1(<2 x half> %a, <2 x half> %c) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_pk_max_f16 v2, v0, 2.0 op_sel_hi:[1,0]
-; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v2
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
+; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v2, v3, v0, s4
+; GFX9-NEXT: v_perm_b32 v2, v3, v0, s0
; GFX9-NEXT: v_pk_max_f16 v2, v2, v1
-; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v3, v1 src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v2
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
-; GFX9-NEXT: v_perm_b32 v0, v3, v0, s4
+; GFX9-NEXT: v_perm_b32 v0, v3, v0, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> <half 2.0, half 2.0>)
%max1 = call <2 x half> @llvm.maximum.v2f16(<2 x half> %max0, <2 x half> %c)
@@ -1650,19 +1800,23 @@ define <2 x half> @v_fmaximum3_v2f16__inlineimm2(<2 x half> %a, <2 x half> %b) {
; GFX9-NEXT: v_pk_max_f16 v2, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v3, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v4, v3, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v1, v0, v4, s4
+; GFX9-NEXT: v_perm_b32 v1, v0, v4, s0
; GFX9-NEXT: v_pk_max_f16 v1, v1, 4.0 op_sel_hi:[1,0]
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v4, v4
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v1, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc
-; GFX9-NEXT: v_perm_b32 v0, v0, v2, s4
+; GFX9-NEXT: v_perm_b32 v0, v0, v2, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> %b)
%max1 = call <2 x half> @llvm.maximum.v2f16(<2 x half> %max0, <2 x half> <half 4.0, half 4.0>)
@@ -1690,29 +1844,35 @@ define <3 x half> @v_fmaximum3_v3f16(<3 x half> %a, <3 x half> %b, <3 x half> %c
; GFX9-NEXT: v_pk_max_f16 v6, v0, v2
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v8, v7, v6, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v6
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX9-NEXT: v_pk_max_f16 v2, v1, v3
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v6, v7, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v1, v1, v6, s4
+; GFX9-NEXT: v_perm_b32 v1, v1, v6, s0
; GFX9-NEXT: v_pk_max_f16 v1, v5, v1
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v5, v6
-; GFX9-NEXT: v_perm_b32 v2, v0, v8, s4
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
+; GFX9-NEXT: v_perm_b32 v2, v0, v8, s0
; GFX9-NEXT: v_pk_max_f16 v2, v4, v2
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v4, v8
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v4, v0 src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX9-NEXT: v_perm_b32 v0, v0, v3, s4
+; GFX9-NEXT: v_perm_b32 v0, v0, v3, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x half> @llvm.maximum.v3f16(<3 x half> %a, <3 x half> %b)
%max1 = call <3 x half> @llvm.maximum.v3f16(<3 x half> %c, <3 x half> %max0)
@@ -1740,29 +1900,35 @@ define <3 x half> @v_fmaximum3_v3f16_commute(<3 x half> %a, <3 x half> %b, <3 x
; GFX9-NEXT: v_pk_max_f16 v6, v0, v2
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v8, v7, v6, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v6
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX9-NEXT: v_pk_max_f16 v2, v1, v3
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v6, v7, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v1, v1, v6, s4
+; GFX9-NEXT: v_perm_b32 v1, v1, v6, s0
; GFX9-NEXT: v_pk_max_f16 v1, v1, v5
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v6, v5
-; GFX9-NEXT: v_perm_b32 v2, v0, v8, s4
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
+; GFX9-NEXT: v_perm_b32 v2, v0, v8, s0
; GFX9-NEXT: v_pk_max_f16 v2, v2, v4
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v8, v4
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX9-NEXT: v_perm_b32 v0, v0, v3, s4
+; GFX9-NEXT: v_perm_b32 v0, v0, v3, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x half> @llvm.maximum.v3f16(<3 x half> %a, <3 x half> %b)
%max1 = call <3 x half> @llvm.maximum.v3f16(<3 x half> %max0, <3 x half> %c)
@@ -1799,33 +1965,37 @@ define <3 x half> @v_fmaximum3_v3f16__fabs_all(<3 x half> %a, <3 x half> %b, <3
; GFX9-NEXT: v_and_b32_e32 v6, 0x7fff7fff, v0
; GFX9-NEXT: v_and_b32_e32 v8, 0x7fff7fff, v2
; GFX9-NEXT: v_pk_max_f16 v7, v7, v9
-; GFX9-NEXT: v_lshrrev_b32_e32 v9, 16, v7
; GFX9-NEXT: v_mov_b32_e32 v12, 0x7e00
+; GFX9-NEXT: v_lshrrev_b32_e32 v9, 16, v7
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, |v1|, |v3| src0_sel:WORD_1 src1_sel:WORD_1
; GFX9-NEXT: v_pk_max_f16 v6, v6, v8
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
; GFX9-NEXT: v_cndmask_b32_e32 v9, v12, v9, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v8, 16, v6
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, |v0|, |v2| src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: v_and_b32_e32 v11, 0x7fff7fff, v4
+; GFX9-NEXT: v_and_b32_e32 v10, 0x7fff7fff, v5
; GFX9-NEXT: v_cndmask_b32_e32 v8, v12, v8, vcc
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, |v1|, |v3|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v12, v7, vcc
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, |v0|, |v2|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v12, v6, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_and_b32_e32 v11, 0x7fff7fff, v4
-; GFX9-NEXT: v_perm_b32 v2, v8, v0, s4
+; GFX9-NEXT: v_perm_b32 v2, v8, v0, s0
; GFX9-NEXT: v_pk_max_f16 v2, v2, v11
-; GFX9-NEXT: v_and_b32_e32 v10, 0x7fff7fff, v5
-; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v8, |v4| src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT: v_perm_b32 v6, v9, v1, s4
+; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v2
+; GFX9-NEXT: v_perm_b32 v6, v9, v1, s0
; GFX9-NEXT: v_cndmask_b32_e32 v3, v12, v3, vcc
; GFX9-NEXT: v_pk_max_f16 v6, v6, v10
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v1, |v5|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v12, v6, vcc
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v0, |v4|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v12, v2, vcc
-; GFX9-NEXT: v_perm_b32 v0, v3, v0, s4
+; GFX9-NEXT: v_perm_b32 v0, v3, v0, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call <3 x half> @llvm.fabs.v3f16(<3 x half> %a)
%b.fabs = call <3 x half> @llvm.fabs.v3f16(<3 x half> %b)
@@ -1856,29 +2026,35 @@ define <3 x half> @v_fmaximum3_v3f16__fneg_all(<3 x half> %a, <3 x half> %b, <3
; GFX9-NEXT: v_pk_max_f16 v6, v0, v2 neg_lo:[1,1] neg_hi:[1,1]
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, -v0, -v2
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v8, v7, v6, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v6
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, -v0, -v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX9-NEXT: v_pk_max_f16 v2, v1, v3 neg_lo:[1,1] neg_hi:[1,1]
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, -v1, -v3
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v6, v7, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, -v1, -v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v1, v1, v6, s4
+; GFX9-NEXT: v_perm_b32 v1, v1, v6, s0
; GFX9-NEXT: v_pk_max_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v6, -v5
-; GFX9-NEXT: v_perm_b32 v2, v0, v8, s4
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
+; GFX9-NEXT: v_perm_b32 v2, v0, v8, s0
; GFX9-NEXT: v_pk_max_f16 v2, v2, v4 neg_lo:[0,1] neg_hi:[0,1]
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v8, -v4
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, -v4 src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX9-NEXT: v_perm_b32 v0, v0, v3, s4
+; GFX9-NEXT: v_perm_b32 v0, v0, v3, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg <3 x half> %a
%b.fneg = fneg <3 x half> %b
@@ -1907,29 +2083,34 @@ define <3 x half> @v_fmaximum3_v3f16__inlineimm1(<3 x half> %a, <3 x half> %c) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_pk_max_f16 v4, v0, 2.0 op_sel_hi:[1,0]
-; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v4
; GFX9-NEXT: v_mov_b32_e32 v6, 0x7e00
+; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v4
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc
; GFX9-NEXT: v_pk_max_f16 v7, v1, 2.0
+; GFX9-NEXT: s_mov_b32 s1, 0x5040100
+; GFX9-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v1
+; GFX9-NEXT: s_movk_i32 s0, 0x7e00
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v7, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v6, v4, vcc
-; GFX9-NEXT: s_mov_b32 s5, 0x5040100
-; GFX9-NEXT: v_perm_b32 v4, v5, v0, s5
+; GFX9-NEXT: v_perm_b32 v4, v5, v0, s1
; GFX9-NEXT: v_pk_max_f16 v4, v4, v2
-; GFX9-NEXT: s_movk_i32 s4, 0x7e00
-; GFX9-NEXT: v_lshrrev_b32_e32 v7, 16, v4
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v5, v2 src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: v_lshrrev_b32_e32 v7, 16, v4
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v5, v6, v7, vcc
-; GFX9-NEXT: v_pack_b32_f16 v7, v1, s4
+; GFX9-NEXT: v_pack_b32_f16 v7, v1, s0
; GFX9-NEXT: v_pk_max_f16 v7, v7, v3
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v7, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v6, v4, vcc
-; GFX9-NEXT: v_perm_b32 v0, v5, v0, s5
+; GFX9-NEXT: v_perm_b32 v0, v5, v0, s1
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x half> @llvm.maximum.v3f16(<3 x half> %a, <3 x half> <half 2.0, half 2.0, half 2.0>)
%max1 = call <3 x half> @llvm.maximum.v3f16(<3 x half> %max0, <3 x half> %c)
@@ -1957,29 +2138,35 @@ define <3 x half> @v_fmaximum3_v3f16__inlineimm2(<3 x half> %a, <3 x half> %b) {
; GFX9-NEXT: v_pk_max_f16 v4, v0, v2
; GFX9-NEXT: v_mov_b32_e32 v5, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v4
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
; GFX9-NEXT: v_pk_max_f16 v2, v1, v3
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v1, v1, v4, s4
+; GFX9-NEXT: v_perm_b32 v1, v1, v4, s0
; GFX9-NEXT: v_pk_max_f16 v1, v1, 4.0
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v4, v4
-; GFX9-NEXT: v_perm_b32 v2, v0, v6, s4
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX9-NEXT: v_perm_b32 v2, v0, v6, s0
; GFX9-NEXT: v_pk_max_f16 v2, v2, 4.0 op_sel_hi:[1,0]
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v6, v6
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v3, v5, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc
-; GFX9-NEXT: v_perm_b32 v0, v0, v3, s4
+; GFX9-NEXT: v_perm_b32 v0, v0, v3, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x half> @llvm.maximum.v3f16(<3 x half> %a, <3 x half> %b)
%max1 = call <3 x half> @llvm.maximum.v3f16(<3 x half> %max0, <3 x half> <half 4.0, half 4.0, half 4.0>)
@@ -2007,33 +2194,41 @@ define <4 x half> @v_fmaximum3_v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c
; GFX9-NEXT: v_pk_max_f16 v6, v0, v2
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v8, v7, v6, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v6
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX9-NEXT: v_pk_max_f16 v2, v1, v3
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v6, v7, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v2, v1, v6, s4
+; GFX9-NEXT: v_perm_b32 v2, v1, v6, s0
; GFX9-NEXT: v_pk_max_f16 v2, v5, v2
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v5, v6
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v5, v1 src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX9-NEXT: v_perm_b32 v2, v0, v8, s4
+; GFX9-NEXT: v_perm_b32 v2, v0, v8, s0
; GFX9-NEXT: v_pk_max_f16 v2, v4, v2
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v4, v8
+; GFX9-NEXT: v_perm_b32 v1, v1, v3, s0
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v5, v7, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v4, v0 src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX9-NEXT: v_perm_b32 v0, v0, v5, s4
-; GFX9-NEXT: v_perm_b32 v1, v1, v3, s4
+; GFX9-NEXT: v_perm_b32 v0, v0, v5, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <4 x half> @llvm.maximum.v4f16(<4 x half> %a, <4 x half> %b)
%max1 = call <4 x half> @llvm.maximum.v4f16(<4 x half> %c, <4 x half> %max0)
@@ -2061,33 +2256,41 @@ define <4 x half> @v_fmaximum3_v4f16_commute(<4 x half> %a, <4 x half> %b, <4 x
; GFX9-NEXT: v_pk_max_f16 v6, v0, v2
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v8, v7, v6, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v6
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX9-NEXT: v_pk_max_f16 v2, v1, v3
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v6, v7, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v2, v1, v6, s4
+; GFX9-NEXT: v_perm_b32 v2, v1, v6, s0
; GFX9-NEXT: v_pk_max_f16 v2, v2, v5
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v6, v5
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, v5 src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX9-NEXT: v_perm_b32 v2, v0, v8, s4
+; GFX9-NEXT: v_perm_b32 v2, v0, v8, s0
; GFX9-NEXT: v_pk_max_f16 v2, v2, v4
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v8, v4
+; GFX9-NEXT: v_perm_b32 v1, v1, v3, s0
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v5, v7, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX9-NEXT: v_perm_b32 v0, v0, v5, s4
-; GFX9-NEXT: v_perm_b32 v1, v1, v3, s4
+; GFX9-NEXT: v_perm_b32 v0, v0, v5, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <4 x half> @llvm.maximum.v4f16(<4 x half> %a, <4 x half> %b)
%max1 = call <4 x half> @llvm.maximum.v4f16(<4 x half> %max0, <4 x half> %c)
@@ -2124,37 +2327,43 @@ define <4 x half> @v_fmaximum3_v4f16__fabs_all(<4 x half> %a, <4 x half> %b, <4
; GFX9-NEXT: v_and_b32_e32 v6, 0x7fff7fff, v1
; GFX9-NEXT: v_and_b32_e32 v8, 0x7fff7fff, v3
; GFX9-NEXT: v_pk_max_f16 v7, v7, v9
-; GFX9-NEXT: v_lshrrev_b32_e32 v9, 16, v7
; GFX9-NEXT: v_mov_b32_e32 v12, 0x7e00
+; GFX9-NEXT: v_lshrrev_b32_e32 v9, 16, v7
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, |v0|, |v2| src0_sel:WORD_1 src1_sel:WORD_1
; GFX9-NEXT: v_pk_max_f16 v6, v6, v8
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
; GFX9-NEXT: v_cndmask_b32_e32 v9, v12, v9, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v8, 16, v6
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, |v1|, |v3| src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: v_and_b32_e32 v11, 0x7fff7fff, v5
+; GFX9-NEXT: v_and_b32_e32 v10, 0x7fff7fff, v4
; GFX9-NEXT: v_cndmask_b32_e32 v8, v12, v8, vcc
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, |v0|, |v2|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v12, v7, vcc
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, |v1|, |v3|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v12, v6, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_and_b32_e32 v11, 0x7fff7fff, v5
-; GFX9-NEXT: v_perm_b32 v2, v8, v1, s4
-; GFX9-NEXT: v_and_b32_e32 v10, 0x7fff7fff, v4
+; GFX9-NEXT: v_perm_b32 v2, v8, v1, s0
; GFX9-NEXT: v_pk_max_f16 v2, v2, v11
-; GFX9-NEXT: v_perm_b32 v6, v9, v0, s4
+; GFX9-NEXT: v_perm_b32 v6, v9, v0, s0
; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v8, |v5| src0_sel:DWORD src1_sel:WORD_1
; GFX9-NEXT: v_pk_max_f16 v6, v6, v10
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v3, v12, v3, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v7, 16, v6
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v9, |v4| src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v7, v12, v7, vcc
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v1, |v5|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v12, v2, vcc
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v0, |v4|
+; GFX9-NEXT: v_perm_b32 v1, v3, v1, s0
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v0, v12, v6, vcc
-; GFX9-NEXT: v_perm_b32 v0, v7, v0, s4
-; GFX9-NEXT: v_perm_b32 v1, v3, v1, s4
+; GFX9-NEXT: v_perm_b32 v0, v7, v0, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call <4 x half> @llvm.fabs.v4f16(<4 x half> %a)
%b.fabs = call <4 x half> @llvm.fabs.v4f16(<4 x half> %b)
@@ -2185,33 +2394,41 @@ define <4 x half> @v_fmaximum3_v4f16__fneg_all(<4 x half> %a, <4 x half> %b, <4
; GFX9-NEXT: v_pk_max_f16 v6, v0, v2 neg_lo:[1,1] neg_hi:[1,1]
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, -v0, -v2
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v8, v7, v6, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v6
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, -v0, -v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX9-NEXT: v_pk_max_f16 v2, v1, v3 neg_lo:[1,1] neg_hi:[1,1]
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, -v1, -v3
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v6, v7, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, -v1, -v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v2, v1, v6, s4
+; GFX9-NEXT: v_perm_b32 v2, v1, v6, s0
; GFX9-NEXT: v_pk_max_f16 v2, v2, v5 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v6, -v5
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, -v5 src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX9-NEXT: v_perm_b32 v2, v0, v8, s4
+; GFX9-NEXT: v_perm_b32 v2, v0, v8, s0
; GFX9-NEXT: v_pk_max_f16 v2, v2, v4 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v8, -v4
+; GFX9-NEXT: v_perm_b32 v1, v1, v3, s0
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v5, v7, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, -v4 src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX9-NEXT: v_perm_b32 v0, v0, v5, s4
-; GFX9-NEXT: v_perm_b32 v1, v1, v3, s4
+; GFX9-NEXT: v_perm_b32 v0, v0, v5, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg <4 x half> %a
%b.fneg = fneg <4 x half> %b
@@ -2240,35 +2457,41 @@ define <4 x half> @v_fmaximum3_v4f16__inlineimm1(<4 x half> %a, <4 x half> %c) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_pk_max_f16 v4, v0, 2.0 op_sel_hi:[1,0]
-; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v4
; GFX9-NEXT: v_mov_b32_e32 v6, 0x7e00
+; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v4
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1
; GFX9-NEXT: v_pk_max_f16 v7, v1, 2.0 op_sel_hi:[1,0]
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
; GFX9-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v8, 16, v7
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v8, v6, v8, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v6, v4, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v7, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v4, v8, v1, s4
+; GFX9-NEXT: v_perm_b32 v4, v8, v1, s0
; GFX9-NEXT: v_pk_max_f16 v4, v4, v3
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v8, v3 src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT: v_perm_b32 v8, v5, v0, s4
+; GFX9-NEXT: v_perm_b32 v8, v5, v0, s0
; GFX9-NEXT: v_lshrrev_b32_e32 v7, 16, v4
; GFX9-NEXT: v_pk_max_f16 v8, v8, v2
; GFX9-NEXT: v_cndmask_b32_e32 v7, v6, v7, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v9, 16, v8
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v5, v2 src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v5, v6, v9, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v4, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX9-NEXT: v_perm_b32 v1, v7, v1, s0
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v0, v6, v8, vcc
-; GFX9-NEXT: v_perm_b32 v0, v5, v0, s4
-; GFX9-NEXT: v_perm_b32 v1, v7, v1, s4
+; GFX9-NEXT: v_perm_b32 v0, v5, v0, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <4 x half> @llvm.maximum.v4f16(<4 x half> %a, <4 x half> <half 2.0, half 2.0, half 2.0, half 2.0>)
%max1 = call <4 x half> @llvm.maximum.v4f16(<4 x half> %max0, <4 x half> %c)
@@ -2296,33 +2519,41 @@ define <4 x half> @v_fmaximum3_v4f16__inlineimm2(<4 x half> %a, <4 x half> %b) {
; GFX9-NEXT: v_pk_max_f16 v4, v0, v2
; GFX9-NEXT: v_mov_b32_e32 v5, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v4
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
; GFX9-NEXT: v_pk_max_f16 v2, v1, v3
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v2, v1, v4, s4
+; GFX9-NEXT: v_perm_b32 v2, v1, v4, s0
; GFX9-NEXT: v_pk_max_f16 v2, v2, 4.0 op_sel_hi:[1,0]
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v4, v4
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v3, v5, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
-; GFX9-NEXT: v_perm_b32 v2, v0, v6, s4
+; GFX9-NEXT: v_perm_b32 v2, v0, v6, s0
; GFX9-NEXT: v_pk_max_f16 v2, v2, 4.0 op_sel_hi:[1,0]
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v6, v6
+; GFX9-NEXT: v_perm_b32 v1, v1, v3, s0
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc
-; GFX9-NEXT: v_perm_b32 v0, v0, v4, s4
-; GFX9-NEXT: v_perm_b32 v1, v1, v3, s4
+; GFX9-NEXT: v_perm_b32 v0, v0, v4, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <4 x half> @llvm.maximum.v4f16(<4 x half> %a, <4 x half> %b)
%max1 = call <4 x half> @llvm.maximum.v4f16(<4 x half> %max0, <4 x half> <half 4.0, half 4.0, half 4.0, half 4.0>)
@@ -2346,12 +2577,14 @@ define double @v_fmaximum3_f64(double %a, double %b, double %c) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_max_f64 v[6:7], v[0:1], v[2:3]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[4:5]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2377,12 +2610,14 @@ define double @v_fmaximum3_f64_commute(double %a, double %b, double %c) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_max_f64 v[6:7], v[0:1], v[2:3]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX9-NEXT: v_max_f64 v[2:3], v[4:5], v[0:1]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[0:1]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2404,19 +2639,20 @@ define amdgpu_ps <2 x i32> @s_fmaximum3_f64(double inreg %a, double inreg %b, do
;
; GFX9-LABEL: s_fmaximum3_f64:
; GFX9: ; %bb.0:
-; GFX9-NEXT: v_mov_b32_e32 v0, s2
-; GFX9-NEXT: v_mov_b32_e32 v1, s3
+; GFX9-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
; GFX9-NEXT: v_max_f64 v[2:3], s[0:1], v[0:1]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, s[0:1], v[0:1]
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, s[0:1], v[0:1]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], s[4:5]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, s[4:5], v[0:1]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v4, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v1, v2, 0, vcc
-; GFX9-NEXT: v_readfirstlane_b32 s0, v1
; GFX9-NEXT: v_readfirstlane_b32 s1, v0
+; GFX9-NEXT: v_readfirstlane_b32 s0, v1
; GFX9-NEXT: ; return to shader part epilog
%max0 = call double @llvm.maximum.f64(double %a, double %b)
%max1 = call double @llvm.maximum.f64(double %max0, double %c)
@@ -2447,12 +2683,14 @@ define double @v_fmaximum3_f64_fabs0(double %a, double %b, double %c) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_max_f64 v[6:7], |v[0:1]|, v[2:3]
-; GFX9-NEXT: v_cmp_u_f64_e64 vcc, |v[0:1]|, v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e64 vcc, |v[0:1]|, v[2:3]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[4:5]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2479,12 +2717,14 @@ define double @v_fmaximum3_f64_fabs1(double %a, double %b, double %c) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_max_f64 v[6:7], v[0:1], |v[2:3]|
-; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], |v[2:3]|
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], |v[2:3]|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[4:5]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2511,12 +2751,14 @@ define double @v_fmaximum3_f64_fabs2(double %a, double %b, double %c) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_max_f64 v[6:7], v[0:1], v[2:3]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], |v[4:5]|
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], |v[4:5]|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2543,12 +2785,14 @@ define double @v_fmaximum3_f64_fabs_all(double %a, double %b, double %c) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_max_f64 v[6:7], |v[0:1]|, |v[2:3]|
-; GFX9-NEXT: v_cmp_u_f64_e64 vcc, |v[0:1]|, |v[2:3]|
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e64 vcc, |v[0:1]|, |v[2:3]|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], |v[4:5]|
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], |v[4:5]|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2577,12 +2821,14 @@ define double @v_fmaximum3_f64_fneg_all(double %a, double %b, double %c) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_max_f64 v[6:7], -v[0:1], -v[2:3]
-; GFX9-NEXT: v_cmp_u_f64_e64 vcc, -v[0:1], -v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e64 vcc, -v[0:1], -v[2:3]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], -v[4:5]
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], -v[4:5]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2611,12 +2857,14 @@ define double @v_fmaximum3_f64_fneg_fabs_all(double %a, double %b, double %c) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_max_f64 v[6:7], -|v[0:1]|, -|v[2:3]|
-; GFX9-NEXT: v_cmp_u_f64_e64 vcc, -|v[0:1]|, -|v[2:3]|
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e64 vcc, -|v[0:1]|, -|v[2:3]|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], -|v[4:5]|
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], -|v[4:5]|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2648,12 +2896,14 @@ define double @v_fmaximum3_f64_fneg0(double %a, double %b, double %c) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_max_f64 v[6:7], -v[0:1], v[2:3]
-; GFX9-NEXT: v_cmp_u_f64_e64 vcc, -v[0:1], v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e64 vcc, -v[0:1], v[2:3]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[4:5]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2680,12 +2930,14 @@ define double @v_fmaximum3_f64_fneg1(double %a, double %b, double %c) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_max_f64 v[6:7], v[0:1], -v[2:3]
-; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], -v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], -v[2:3]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[4:5]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2712,12 +2964,14 @@ define double @v_fmaximum3_f64_fneg2(double %a, double %b, double %c) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_max_f64 v[6:7], v[0:1], v[2:3]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], -v[4:5]
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], -v[4:5]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2743,15 +2997,17 @@ define double @v_fmaximum3_f64_const0(double %b, double %c) {
; GFX9-LABEL: v_fmaximum3_f64_const0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_mov_b32 s4, 0
-; GFX9-NEXT: s_mov_b32 s5, 0x40200000
-; GFX9-NEXT: v_max_f64 v[4:5], v[0:1], s[4:5]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT: s_mov_b32 s0, 0
+; GFX9-NEXT: s_mov_b32 s1, 0x40200000
+; GFX9-NEXT: v_max_f64 v[4:5], v[0:1], s[0:1]
; GFX9-NEXT: v_mov_b32_e32 v6, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
; GFX9-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2777,14 +3033,15 @@ define double @v_fmaximum3_f64__const2(double %a, double %b) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v6, 0x7ff80000
-; GFX9-NEXT: s_mov_b32 s4, 0
-; GFX9-NEXT: s_mov_b32 s5, 0x40200000
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT: s_mov_b32 s0, 0
+; GFX9-NEXT: s_mov_b32 s1, 0x40200000
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], s[4:5]
+; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], s[0:1]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v6, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2810,12 +3067,14 @@ define double @v_fmaximum3_f64_inlineimm0(double %b, double %c) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_max_f64 v[4:5], v[0:1], 4.0
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
; GFX9-NEXT: v_mov_b32_e32 v6, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
; GFX9-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2841,12 +3100,14 @@ define double @v_fmaximum3_f64__inlineimm(double %a, double %b) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v6, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], 4.0
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v6, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2871,17 +3132,18 @@ define double @v_fmaximum3_f64_const1_const2(double %a) {
; GFX9-LABEL: v_fmaximum3_f64_const1_const2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_mov_b32 s4, 0
-; GFX9-NEXT: s_mov_b32 s5, 0x40200000
-; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], s[4:5]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT: s_mov_b32 s0, 0
+; GFX9-NEXT: s_mov_b32 s1, 0x40200000
+; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], s[0:1]
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7ff80000
-; GFX9-NEXT: s_mov_b32 s4, 0
-; GFX9-NEXT: s_mov_b32 s5, 0x40300000
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT: s_mov_b32 s0, 0
+; GFX9-NEXT: s_mov_b32 s1, 0x40300000
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], s[4:5]
+; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], s[0:1]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2909,9 +3171,11 @@ define <2 x float> @v_no_fmaximum3_f32__multi_use(float %a, float %b, float %c)
; GFX9-NEXT: v_max_f32_e32 v3, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_max_f32_e32 v1, v0, v2
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.maximum.f32(float %a, float %b)
@@ -2935,11 +3199,14 @@ define amdgpu_ps <2 x i32> @s_no_fmaximum3_f32__multi_use(float inreg %a, float
; GFX9-NEXT: v_max_f32_e32 v1, s0, v0
; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX9-NEXT: v_max_f32_e32 v1, s2, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s2, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_readfirstlane_b32 s1, v1
; GFX9-NEXT: ; return to shader part epilog
%max0 = call float @llvm.maximum.f32(float %a, float %b)
@@ -2973,9 +3240,11 @@ define <2 x half> @v_no_fmaximum3_f16__multi_use(half %a, half %b, half %c) {
; GFX9-NEXT: v_max_f16_e32 v3, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_max_f16_e32 v1, v0, v2
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -3002,11 +3271,13 @@ define amdgpu_ps <2 x i32> @s_no_fmaximum3_f16__multi_use(half inreg %a, half in
; GFX9-NEXT: v_max_f16_e32 v1, s0, v0
; GFX9-NEXT: v_mov_b32_e32 v2, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, s0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX9-NEXT: v_max_f16_e32 v1, s2, v0
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, s2, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
; GFX9-NEXT: v_readfirstlane_b32 s1, v1
@@ -3043,19 +3314,23 @@ define <4 x half> @v_no_fmaximum3_v2f16__multi_use(<2 x half> %a, <2 x half> %b,
; GFX9-NEXT: v_pk_max_f16 v3, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v5, v4, v3, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v0, v1, v5, s4
+; GFX9-NEXT: v_perm_b32 v0, v1, v5, s0
; GFX9-NEXT: v_pk_max_f16 v3, v0, v2
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v5, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v5, v4, v3, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
-; GFX9-NEXT: v_perm_b32 v1, v1, v5, s4
+; GFX9-NEXT: v_perm_b32 v1, v1, v5, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x half> @llvm.maximum.f16(<2 x half> %a, <2 x half> %b)
%max1 = call <2 x half> @llvm.maximum.f16(<2 x half> %max0, <2 x half> %c)
@@ -3080,12 +3355,14 @@ define <2 x double> @v_no_fmaximum3_f64__multi_use(double %a, double %b, double
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_max_f64 v[6:7], v[0:1], v[2:3]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[4:5]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -3095,3 +3372,6 @@ define <2 x double> @v_no_fmaximum3_f64__multi_use(double %a, double %b, double
%insert.1 = insertelement <2 x double> %insert.0, double %max1, i32 1
ret <2 x double> %insert.1
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX940: {{.*}}
+; GFX950: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/fminimum3.ll b/llvm/test/CodeGen/AMDGPU/fminimum3.ll
index d9ba2de48bb010..43293512c8c21d 100644
--- a/llvm/test/CodeGen/AMDGPU/fminimum3.ll
+++ b/llvm/test/CodeGen/AMDGPU/fminimum3.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX940 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950 %s
define float @v_fminimum3_f32(float %a, float %b, float %c) {
; GFX12-LABEL: v_fminimum3_f32:
@@ -19,9 +20,11 @@ define float @v_fminimum3_f32(float %a, float %b, float %c) {
; GFX9-NEXT: v_min_f32_e32 v3, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_min_f32_e32 v1, v0, v2
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.minimum.f32(float %a, float %b)
@@ -46,9 +49,11 @@ define float @v_fminimum3_f32_commute(float %a, float %b, float %c) {
; GFX9-NEXT: v_min_f32_e32 v3, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_min_f32_e32 v1, v2, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.minimum.f32(float %a, float %b)
@@ -71,10 +76,13 @@ define amdgpu_ps i32 @s_fminimum3_f32(float inreg %a, float inreg %b, float inre
; GFX9-NEXT: v_min_f32_e32 v1, s0, v0
; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX9-NEXT: v_min_f32_e32 v1, s2, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s2, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
; GFX9-NEXT: ; return to shader part epilog
%max0 = call float @llvm.minimum.f32(float %a, float %b)
@@ -101,9 +109,11 @@ define float @v_fminimum3_f32_fabs0(float %a, float %b, float %c) {
; GFX9-NEXT: v_min_f32_e64 v3, |v0|, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v0|, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_min_f32_e32 v1, v0, v2
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call float @llvm.fabs.f32(float %a)
@@ -129,9 +139,11 @@ define float @v_fminimum3_f32_fabs1(float %a, float %b, float %c) {
; GFX9-NEXT: v_min_f32_e64 v3, v0, |v1|
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v1|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_min_f32_e32 v1, v0, v2
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%b.fabs = call float @llvm.fabs.f32(float %b)
@@ -157,9 +169,11 @@ define float @v_fminimum3_f32_fabs2(float %a, float %b, float %c) {
; GFX9-NEXT: v_min_f32_e32 v3, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_min_f32_e64 v1, v0, |v2|
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v2|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%c.fabs = call float @llvm.fabs.f32(float %c)
@@ -185,9 +199,11 @@ define float @v_fminimum3_f32_fabs_all(float %a, float %b, float %c) {
; GFX9-NEXT: v_min_f32_e64 v3, |v0|, |v1|
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v1|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_min_f32_e64 v1, v0, |v2|
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v2|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call float @llvm.fabs.f32(float %a)
@@ -215,9 +231,11 @@ define float @v_fminimum3_f32_fneg_all(float %a, float %b, float %c) {
; GFX9-NEXT: v_min_f32_e64 v3, -v0, -v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_min_f32_e64 v1, v0, -v2
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg float %a
@@ -245,9 +263,11 @@ define float @v_fminimum3_f32_fneg_fabs_all(float %a, float %b, float %c) {
; GFX9-NEXT: v_min_f32_e64 v3, -|v0|, -|v1|
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -|v0|, -|v1|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_min_f32_e64 v1, v0, -|v2|
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -|v2|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call float @llvm.fabs.f32(float %a)
@@ -278,9 +298,11 @@ define float @v_fminimum3_f32_fneg0(float %a, float %b, float %c) {
; GFX9-NEXT: v_min_f32_e64 v3, -v0, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_min_f32_e32 v1, v0, v2
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg float %a
@@ -306,9 +328,11 @@ define float @v_fminimum3_f32_fneg1(float %a, float %b, float %c) {
; GFX9-NEXT: v_min_f32_e64 v3, v0, -v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_min_f32_e32 v1, v0, v2
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%b.fneg = fneg float %b
@@ -334,9 +358,11 @@ define float @v_fminimum3_f32_fneg2(float %a, float %b, float %c) {
; GFX9-NEXT: v_min_f32_e32 v3, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_min_f32_e64 v1, v0, -v2
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%c.fneg = fneg float %c
@@ -362,9 +388,11 @@ define float @v_fminimum3_f32_const0(float %b, float %c) {
; GFX9-NEXT: v_min_f32_e32 v2, 0x41000000, v0
; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: v_min_f32_e32 v2, v0, v1
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.minimum.f32(float 8.0, float %b)
@@ -389,9 +417,11 @@ define float @v_fminimum3_f32__const2(float %a, float %b) {
; GFX9-NEXT: v_min_f32_e32 v2, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: v_min_f32_e32 v1, 0x41000000, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.minimum.f32(float %a, float %b)
@@ -416,9 +446,11 @@ define float @v_fminimum3_f32_inlineimm0(float %b, float %c) {
; GFX9-NEXT: v_min_f32_e32 v2, 4.0, v0
; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: v_min_f32_e32 v2, v0, v1
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.minimum.f32(float 4.0, float %b)
@@ -443,9 +475,11 @@ define float @v_fminimum3_f32__inlineimm(float %a, float %b) {
; GFX9-NEXT: v_min_f32_e32 v2, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: v_min_f32_e32 v1, 4.0, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.minimum.f32(float %a, float %b)
@@ -472,9 +506,11 @@ define float @v_fminimum3_f32_const1_const2(float %a) {
; GFX9-NEXT: v_min_f32_e32 v1, 0x41000000, v0
; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX9-NEXT: v_min_f32_e32 v1, 0x41800000, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.minimum.f32(float %a, float 8.0)
@@ -500,15 +536,19 @@ define <2 x float> @v_fminimum3_v2f32(<2 x float> %a, <2 x float> %b, <2 x float
; GFX9-NEXT: v_min_f32_e32 v6, v1, v3
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
; GFX9-NEXT: v_min_f32_e32 v3, v0, v2
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
; GFX9-NEXT: v_min_f32_e32 v2, v4, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v4, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
; GFX9-NEXT: v_min_f32_e32 v2, v5, v1
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v5, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %a, <2 x float> %b)
@@ -534,15 +574,19 @@ define <2 x float> @v_fminimum3_v2f32_commute(<2 x float> %a, <2 x float> %b, <2
; GFX9-NEXT: v_min_f32_e32 v6, v1, v3
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
; GFX9-NEXT: v_min_f32_e32 v3, v0, v2
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
; GFX9-NEXT: v_min_f32_e32 v2, v0, v4
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
; GFX9-NEXT: v_min_f32_e32 v2, v1, v5
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %a, <2 x float> %b)
@@ -568,15 +612,19 @@ define <2 x float> @v_fminimum3_v2f32__fabs_all(<2 x float> %a, <2 x float> %b,
; GFX9-NEXT: v_min_f32_e64 v6, |v1|, |v3|
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v1|, |v3|
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
; GFX9-NEXT: v_min_f32_e64 v3, |v0|, |v2|
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v2|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
; GFX9-NEXT: v_min_f32_e64 v2, v0, |v4|
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v4|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
; GFX9-NEXT: v_min_f32_e64 v2, v1, |v5|
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v1, |v5|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a)
@@ -605,15 +653,19 @@ define <2 x float> @v_fminimum3_v2f32__fneg_all(<2 x float> %a, <2 x float> %b,
; GFX9-NEXT: v_min_f32_e64 v6, -v1, -v3
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v1, -v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
; GFX9-NEXT: v_min_f32_e64 v3, -v0, -v2
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
; GFX9-NEXT: v_min_f32_e64 v2, v0, -v4
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v4
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
; GFX9-NEXT: v_min_f32_e64 v2, v1, -v5
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v1, -v5
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg <2 x float> %a
@@ -642,15 +694,19 @@ define <2 x float> @v_fminimum3_v2f32__inlineimm1(<2 x float> %a, <2 x float> %c
; GFX9-NEXT: v_min_f32_e32 v4, 2.0, v1
; GFX9-NEXT: v_mov_b32_e32 v5, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
; GFX9-NEXT: v_min_f32_e32 v4, 2.0, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
; GFX9-NEXT: v_min_f32_e32 v4, v0, v2
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
; GFX9-NEXT: v_min_f32_e32 v2, v1, v3
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %a, <2 x float> <float 2.0, float 2.0>)
@@ -676,15 +732,19 @@ define <2 x float> @v_fminimum3_v2f32__inlineimm2(<2 x float> %a, <2 x float> %b
; GFX9-NEXT: v_min_f32_e32 v4, v1, v3
; GFX9-NEXT: v_mov_b32_e32 v5, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
; GFX9-NEXT: v_min_f32_e32 v3, v0, v2
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
; GFX9-NEXT: v_min_f32_e32 v2, 4.0, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc
; GFX9-NEXT: v_min_f32_e32 v2, 4.0, v1
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %a, <2 x float> %b)
@@ -711,21 +771,27 @@ define <3 x float> @v_fminimum3_v3f32(<3 x float> %a, <3 x float> %b, <3 x float
; GFX9-NEXT: v_min_f32_e32 v9, v2, v5
; GFX9-NEXT: v_mov_b32_e32 v10, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
; GFX9-NEXT: v_min_f32_e32 v5, v1, v4
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
; GFX9-NEXT: v_min_f32_e32 v4, v0, v3
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc
; GFX9-NEXT: v_min_f32_e32 v3, v6, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v6, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc
; GFX9-NEXT: v_min_f32_e32 v3, v7, v1
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v7, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc
; GFX9-NEXT: v_min_f32_e32 v3, v8, v2
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v8, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %a, <3 x float> %b)
@@ -752,21 +818,27 @@ define <3 x float> @v_fminimum3_v3f32_commute(<3 x float> %a, <3 x float> %b, <3
; GFX9-NEXT: v_min_f32_e32 v9, v2, v5
; GFX9-NEXT: v_mov_b32_e32 v10, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
; GFX9-NEXT: v_min_f32_e32 v5, v1, v4
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
; GFX9-NEXT: v_min_f32_e32 v4, v0, v3
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc
; GFX9-NEXT: v_min_f32_e32 v3, v0, v6
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v6
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc
; GFX9-NEXT: v_min_f32_e32 v3, v1, v7
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v7
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc
; GFX9-NEXT: v_min_f32_e32 v3, v2, v8
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v8
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %a, <3 x float> %b)
@@ -793,21 +865,27 @@ define <3 x float> @v_fminimum3_v3f32__fabs_all(<3 x float> %a, <3 x float> %b,
; GFX9-NEXT: v_min_f32_e64 v9, |v2|, |v5|
; GFX9-NEXT: v_mov_b32_e32 v10, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v2|, |v5|
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
; GFX9-NEXT: v_min_f32_e64 v5, |v1|, |v4|
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v1|, |v4|
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
; GFX9-NEXT: v_min_f32_e64 v4, |v0|, |v3|
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v3|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc
; GFX9-NEXT: v_min_f32_e64 v3, v0, |v6|
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v6|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc
; GFX9-NEXT: v_min_f32_e64 v3, v1, |v7|
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v1, |v7|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc
; GFX9-NEXT: v_min_f32_e64 v3, v2, |v8|
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v2, |v8|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call <3 x float> @llvm.fabs.v3f32(<3 x float> %a)
@@ -837,21 +915,27 @@ define <3 x float> @v_fminimum3_v3f32__fneg_all(<3 x float> %a, <3 x float> %b,
; GFX9-NEXT: v_min_f32_e64 v9, -v2, -v5
; GFX9-NEXT: v_mov_b32_e32 v10, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v2, -v5
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
; GFX9-NEXT: v_min_f32_e64 v5, -v1, -v4
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v1, -v4
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
; GFX9-NEXT: v_min_f32_e64 v4, -v0, -v3
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v3
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc
; GFX9-NEXT: v_min_f32_e64 v3, v0, -v6
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v6
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc
; GFX9-NEXT: v_min_f32_e64 v3, v1, -v7
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v1, -v7
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc
; GFX9-NEXT: v_min_f32_e64 v3, v2, -v8
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v2, -v8
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg <3 x float> %a
@@ -881,21 +965,27 @@ define <3 x float> @v_fminimum3_v3f32__inlineimm1(<3 x float> %a, <3 x float> %c
; GFX9-NEXT: v_min_f32_e32 v6, 2.0, v2
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc
; GFX9-NEXT: v_min_f32_e32 v6, 2.0, v1
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
; GFX9-NEXT: v_min_f32_e32 v6, 2.0, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX9-NEXT: v_min_f32_e32 v6, v0, v3
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX9-NEXT: v_min_f32_e32 v3, v1, v4
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
; GFX9-NEXT: v_min_f32_e32 v3, v2, v5
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %a, <3 x float> <float 2.0, float 2.0, float 2.0>)
@@ -922,21 +1012,27 @@ define <3 x float> @v_fminimum3_v3f32__inlineimm2(<3 x float> %a, <3 x float> %b
; GFX9-NEXT: v_min_f32_e32 v6, v2, v5
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc
; GFX9-NEXT: v_min_f32_e32 v5, v1, v4
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v5, vcc
; GFX9-NEXT: v_min_f32_e32 v4, v0, v3
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v5, vcc
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v4, vcc
; GFX9-NEXT: v_min_f32_e32 v3, 4.0, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
; GFX9-NEXT: v_min_f32_e32 v3, 4.0, v1
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
; GFX9-NEXT: v_min_f32_e32 v3, 4.0, v2
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %a, <3 x float> %b)
@@ -962,9 +1058,11 @@ define half @v_fminimum3_f16(half %a, half %b, half %c) {
; GFX9-NEXT: v_min_f16_e32 v3, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_min_f16_e32 v1, v0, v2
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call half @llvm.minimum.f16(half %a, half %b)
@@ -989,9 +1087,11 @@ define half @v_fminimum3_f16_commute(half %a, half %b, half %c) {
; GFX9-NEXT: v_min_f16_e32 v3, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_min_f16_e32 v1, v2, v0
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v2, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call half @llvm.minimum.f16(half %a, half %b)
@@ -1016,11 +1116,14 @@ define amdgpu_ps i32 @s_fminimum3_f16(half inreg %a, half inreg %b, half inreg %
; GFX9-NEXT: v_min_f16_e32 v1, s0, v0
; GFX9-NEXT: v_mov_b32_e32 v2, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, s0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX9-NEXT: v_min_f16_e32 v1, s2, v0
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, s2, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
; GFX9-NEXT: ; return to shader part epilog
%max0 = call half @llvm.minimum.f16(half %a, half %b)
@@ -1048,9 +1151,11 @@ define half @v_fminimum3_f16_fabs0(half %a, half %b, half %c) {
; GFX9-NEXT: v_min_f16_e64 v3, |v0|, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, |v0|, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_min_f16_e32 v1, v0, v2
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call half @llvm.fabs.f16(half %a)
@@ -1076,9 +1181,11 @@ define half @v_fminimum3_f16_fabs1(half %a, half %b, half %c) {
; GFX9-NEXT: v_min_f16_e64 v3, v0, |v1|
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v0, |v1|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_min_f16_e32 v1, v0, v2
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%b.fabs = call half @llvm.fabs.f16(half %b)
@@ -1104,9 +1211,11 @@ define half @v_fminimum3_f16_fabs2(half %a, half %b, half %c) {
; GFX9-NEXT: v_min_f16_e32 v3, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_min_f16_e64 v1, v0, |v2|
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v0, |v2|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%c.fabs = call half @llvm.fabs.f16(half %c)
@@ -1132,9 +1241,11 @@ define half @v_fminimum3_f16_fabs_all(half %a, half %b, half %c) {
; GFX9-NEXT: v_min_f16_e64 v3, |v0|, |v1|
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, |v0|, |v1|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_min_f16_e64 v1, v0, |v2|
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v0, |v2|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call half @llvm.fabs.f16(half %a)
@@ -1162,9 +1273,11 @@ define half @v_fminimum3_f16_fneg_all(half %a, half %b, half %c) {
; GFX9-NEXT: v_min_f16_e64 v3, -v0, -v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, -v0, -v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_min_f16_e64 v1, v0, -v2
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v0, -v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg half %a
@@ -1192,9 +1305,11 @@ define half @v_fminimum3_f16_fneg_fabs_all(half %a, half %b, half %c) {
; GFX9-NEXT: v_min_f16_e64 v3, -|v0|, -|v1|
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, -|v0|, -|v1|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_min_f16_e64 v1, v0, -|v2|
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v0, -|v2|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call half @llvm.fabs.f16(half %a)
@@ -1225,9 +1340,11 @@ define half @v_fminimum3_f16_fneg0(half %a, half %b, half %c) {
; GFX9-NEXT: v_min_f16_e64 v3, -v0, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, -v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_min_f16_e32 v1, v0, v2
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg half %a
@@ -1253,9 +1370,11 @@ define half @v_fminimum3_f16_fneg1(half %a, half %b, half %c) {
; GFX9-NEXT: v_min_f16_e64 v3, v0, -v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v0, -v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_min_f16_e32 v1, v0, v2
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%b.fneg = fneg half %b
@@ -1281,9 +1400,11 @@ define half @v_fminimum3_f16_fneg2(half %a, half %b, half %c) {
; GFX9-NEXT: v_min_f16_e32 v3, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_min_f16_e64 v1, v0, -v2
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v0, -v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%c.fneg = fneg half %c
@@ -1309,9 +1430,11 @@ define half @v_fminimum3_f16_const0(half %b, half %c) {
; GFX9-NEXT: v_min_f16_e32 v2, 0x4800, v0
; GFX9-NEXT: v_mov_b32_e32 v3, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: v_min_f16_e32 v2, v0, v1
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call half @llvm.minimum.f16(half 8.0, half %b)
@@ -1336,9 +1459,11 @@ define half @v_fminimum3_f16__const2(half %a, half %b) {
; GFX9-NEXT: v_min_f16_e32 v2, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v3, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: v_min_f16_e32 v1, 0x4800, v0
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call half @llvm.minimum.f16(half %a, half %b)
@@ -1363,9 +1488,11 @@ define half @v_fminimum3_f16_inlineimm0(half %b, half %c) {
; GFX9-NEXT: v_min_f16_e32 v2, 4.0, v0
; GFX9-NEXT: v_mov_b32_e32 v3, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: v_min_f16_e32 v2, v0, v1
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call half @llvm.minimum.f16(half 4.0, half %b)
@@ -1390,9 +1517,11 @@ define half @v_fminimum3_f16__inlineimm(half %a, half %b) {
; GFX9-NEXT: v_min_f16_e32 v2, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v3, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: v_min_f16_e32 v1, 4.0, v0
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call half @llvm.minimum.f16(half %a, half %b)
@@ -1419,9 +1548,11 @@ define half @v_fminimum3_f16_const1_const2(half %a) {
; GFX9-NEXT: v_min_f16_e32 v1, 0x4800, v0
; GFX9-NEXT: v_mov_b32_e32 v2, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX9-NEXT: v_min_f16_e32 v1, 0x4c00, v0
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call half @llvm.minimum.f16(half %a, half 8.0)
@@ -1448,19 +1579,23 @@ define <2 x half> @v_fminimum3_v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c
; GFX9-NEXT: v_pk_min_f16 v3, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v5, v4, v3, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v1, v0, v5, s4
+; GFX9-NEXT: v_perm_b32 v1, v0, v5, s0
; GFX9-NEXT: v_pk_min_f16 v1, v2, v1
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v2, v5
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v2, v0 src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX9-NEXT: v_perm_b32 v0, v0, v3, s4
+; GFX9-NEXT: v_perm_b32 v0, v0, v3, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x half> @llvm.minimum.v2f16(<2 x half> %a, <2 x half> %b)
%max1 = call <2 x half> @llvm.minimum.v2f16(<2 x half> %c, <2 x half> %max0)
@@ -1486,19 +1621,23 @@ define <2 x half> @v_fminimum3_v2f16_commute(<2 x half> %a, <2 x half> %b, <2 x
; GFX9-NEXT: v_pk_min_f16 v3, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v5, v4, v3, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v1, v0, v5, s4
+; GFX9-NEXT: v_perm_b32 v1, v0, v5, s0
; GFX9-NEXT: v_pk_min_f16 v1, v1, v2
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v5, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX9-NEXT: v_perm_b32 v0, v0, v3, s4
+; GFX9-NEXT: v_perm_b32 v0, v0, v3, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x half> @llvm.minimum.v2f16(<2 x half> %a, <2 x half> %b)
%max1 = call <2 x half> @llvm.minimum.v2f16(<2 x half> %max0, <2 x half> %c)
@@ -1527,22 +1666,25 @@ define <2 x half> @v_fminimum3_v2f16__fabs_all(<2 x half> %a, <2 x half> %b, <2
; GFX9-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v0
; GFX9-NEXT: v_and_b32_e32 v4, 0x7fff7fff, v1
; GFX9-NEXT: v_pk_min_f16 v3, v3, v4
-; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v3
; GFX9-NEXT: v_mov_b32_e32 v6, 0x7e00
+; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v3
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, |v0|, |v1| src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
+; GFX9-NEXT: v_and_b32_e32 v5, 0x7fff7fff, v2
; GFX9-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, |v0|, |v1|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v6, v3, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_and_b32_e32 v5, 0x7fff7fff, v2
-; GFX9-NEXT: v_perm_b32 v1, v4, v0, s4
+; GFX9-NEXT: v_perm_b32 v1, v4, v0, s0
; GFX9-NEXT: v_pk_min_f16 v1, v1, v5
-; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v1
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v4, |v2| src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v1
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v0, |v2|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v6, v1, vcc
-; GFX9-NEXT: v_perm_b32 v0, v3, v0, s4
+; GFX9-NEXT: v_perm_b32 v0, v3, v0, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %a)
%b.fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %b)
@@ -1571,19 +1713,23 @@ define <2 x half> @v_fminimum3_v2f16__fneg_all(<2 x half> %a, <2 x half> %b, <2
; GFX9-NEXT: v_pk_min_f16 v3, v0, v1 neg_lo:[1,1] neg_hi:[1,1]
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, -v0, -v1
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v5, v4, v3, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, -v0, -v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v1, v0, v5, s4
+; GFX9-NEXT: v_perm_b32 v1, v0, v5, s0
; GFX9-NEXT: v_pk_min_f16 v1, v1, v2 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v5, -v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, -v2 src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX9-NEXT: v_perm_b32 v0, v0, v3, s4
+; GFX9-NEXT: v_perm_b32 v0, v0, v3, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg <2 x half> %a
%b.fneg = fneg <2 x half> %b
@@ -1610,21 +1756,25 @@ define <2 x half> @v_fminimum3_v2f16__inlineimm1(<2 x half> %a, <2 x half> %c) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_pk_min_f16 v2, v0, 2.0 op_sel_hi:[1,0]
-; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v2
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
+; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v2, v3, v0, s4
+; GFX9-NEXT: v_perm_b32 v2, v3, v0, s0
; GFX9-NEXT: v_pk_min_f16 v2, v2, v1
-; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v3, v1 src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v2
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
-; GFX9-NEXT: v_perm_b32 v0, v3, v0, s4
+; GFX9-NEXT: v_perm_b32 v0, v3, v0, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x half> @llvm.minimum.v2f16(<2 x half> %a, <2 x half> <half 2.0, half 2.0>)
%max1 = call <2 x half> @llvm.minimum.v2f16(<2 x half> %max0, <2 x half> %c)
@@ -1650,19 +1800,23 @@ define <2 x half> @v_fminimum3_v2f16__inlineimm2(<2 x half> %a, <2 x half> %b) {
; GFX9-NEXT: v_pk_min_f16 v2, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v3, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v4, v3, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v1, v0, v4, s4
+; GFX9-NEXT: v_perm_b32 v1, v0, v4, s0
; GFX9-NEXT: v_pk_min_f16 v1, v1, 4.0 op_sel_hi:[1,0]
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v4, v4
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v1, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc
-; GFX9-NEXT: v_perm_b32 v0, v0, v2, s4
+; GFX9-NEXT: v_perm_b32 v0, v0, v2, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x half> @llvm.minimum.v2f16(<2 x half> %a, <2 x half> %b)
%max1 = call <2 x half> @llvm.minimum.v2f16(<2 x half> %max0, <2 x half> <half 4.0, half 4.0>)
@@ -1690,29 +1844,35 @@ define <3 x half> @v_fminimum3_v3f16(<3 x half> %a, <3 x half> %b, <3 x half> %c
; GFX9-NEXT: v_pk_min_f16 v6, v0, v2
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v8, v7, v6, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v6
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX9-NEXT: v_pk_min_f16 v2, v1, v3
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v6, v7, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v1, v1, v6, s4
+; GFX9-NEXT: v_perm_b32 v1, v1, v6, s0
; GFX9-NEXT: v_pk_min_f16 v1, v5, v1
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v5, v6
-; GFX9-NEXT: v_perm_b32 v2, v0, v8, s4
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
+; GFX9-NEXT: v_perm_b32 v2, v0, v8, s0
; GFX9-NEXT: v_pk_min_f16 v2, v4, v2
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v4, v8
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v4, v0 src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX9-NEXT: v_perm_b32 v0, v0, v3, s4
+; GFX9-NEXT: v_perm_b32 v0, v0, v3, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x half> @llvm.minimum.v3f16(<3 x half> %a, <3 x half> %b)
%max1 = call <3 x half> @llvm.minimum.v3f16(<3 x half> %c, <3 x half> %max0)
@@ -1740,29 +1900,35 @@ define <3 x half> @v_fminimum3_v3f16_commute(<3 x half> %a, <3 x half> %b, <3 x
; GFX9-NEXT: v_pk_min_f16 v6, v0, v2
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v8, v7, v6, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v6
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX9-NEXT: v_pk_min_f16 v2, v1, v3
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v6, v7, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v1, v1, v6, s4
+; GFX9-NEXT: v_perm_b32 v1, v1, v6, s0
; GFX9-NEXT: v_pk_min_f16 v1, v1, v5
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v6, v5
-; GFX9-NEXT: v_perm_b32 v2, v0, v8, s4
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
+; GFX9-NEXT: v_perm_b32 v2, v0, v8, s0
; GFX9-NEXT: v_pk_min_f16 v2, v2, v4
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v8, v4
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX9-NEXT: v_perm_b32 v0, v0, v3, s4
+; GFX9-NEXT: v_perm_b32 v0, v0, v3, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x half> @llvm.minimum.v3f16(<3 x half> %a, <3 x half> %b)
%max1 = call <3 x half> @llvm.minimum.v3f16(<3 x half> %max0, <3 x half> %c)
@@ -1799,33 +1965,37 @@ define <3 x half> @v_fminimum3_v3f16__fabs_all(<3 x half> %a, <3 x half> %b, <3
; GFX9-NEXT: v_and_b32_e32 v6, 0x7fff7fff, v0
; GFX9-NEXT: v_and_b32_e32 v8, 0x7fff7fff, v2
; GFX9-NEXT: v_pk_min_f16 v7, v7, v9
-; GFX9-NEXT: v_lshrrev_b32_e32 v9, 16, v7
; GFX9-NEXT: v_mov_b32_e32 v12, 0x7e00
+; GFX9-NEXT: v_lshrrev_b32_e32 v9, 16, v7
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, |v1|, |v3| src0_sel:WORD_1 src1_sel:WORD_1
; GFX9-NEXT: v_pk_min_f16 v6, v6, v8
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
; GFX9-NEXT: v_cndmask_b32_e32 v9, v12, v9, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v8, 16, v6
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, |v0|, |v2| src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: v_and_b32_e32 v11, 0x7fff7fff, v4
+; GFX9-NEXT: v_and_b32_e32 v10, 0x7fff7fff, v5
; GFX9-NEXT: v_cndmask_b32_e32 v8, v12, v8, vcc
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, |v1|, |v3|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v12, v7, vcc
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, |v0|, |v2|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v12, v6, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_and_b32_e32 v11, 0x7fff7fff, v4
-; GFX9-NEXT: v_perm_b32 v2, v8, v0, s4
+; GFX9-NEXT: v_perm_b32 v2, v8, v0, s0
; GFX9-NEXT: v_pk_min_f16 v2, v2, v11
-; GFX9-NEXT: v_and_b32_e32 v10, 0x7fff7fff, v5
-; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v8, |v4| src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT: v_perm_b32 v6, v9, v1, s4
+; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v2
+; GFX9-NEXT: v_perm_b32 v6, v9, v1, s0
; GFX9-NEXT: v_cndmask_b32_e32 v3, v12, v3, vcc
; GFX9-NEXT: v_pk_min_f16 v6, v6, v10
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v1, |v5|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v12, v6, vcc
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v0, |v4|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v12, v2, vcc
-; GFX9-NEXT: v_perm_b32 v0, v3, v0, s4
+; GFX9-NEXT: v_perm_b32 v0, v3, v0, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call <3 x half> @llvm.fabs.v3f16(<3 x half> %a)
%b.fabs = call <3 x half> @llvm.fabs.v3f16(<3 x half> %b)
@@ -1856,29 +2026,35 @@ define <3 x half> @v_fminimum3_v3f16__fneg_all(<3 x half> %a, <3 x half> %b, <3
; GFX9-NEXT: v_pk_min_f16 v6, v0, v2 neg_lo:[1,1] neg_hi:[1,1]
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, -v0, -v2
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v8, v7, v6, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v6
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, -v0, -v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX9-NEXT: v_pk_min_f16 v2, v1, v3 neg_lo:[1,1] neg_hi:[1,1]
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, -v1, -v3
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v6, v7, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, -v1, -v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v1, v1, v6, s4
+; GFX9-NEXT: v_perm_b32 v1, v1, v6, s0
; GFX9-NEXT: v_pk_min_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v6, -v5
-; GFX9-NEXT: v_perm_b32 v2, v0, v8, s4
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
+; GFX9-NEXT: v_perm_b32 v2, v0, v8, s0
; GFX9-NEXT: v_pk_min_f16 v2, v2, v4 neg_lo:[0,1] neg_hi:[0,1]
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v8, -v4
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, -v4 src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX9-NEXT: v_perm_b32 v0, v0, v3, s4
+; GFX9-NEXT: v_perm_b32 v0, v0, v3, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg <3 x half> %a
%b.fneg = fneg <3 x half> %b
@@ -1907,29 +2083,34 @@ define <3 x half> @v_fminimum3_v3f16__inlineimm1(<3 x half> %a, <3 x half> %c) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_pk_min_f16 v4, v0, 2.0 op_sel_hi:[1,0]
-; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v4
; GFX9-NEXT: v_mov_b32_e32 v6, 0x7e00
+; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v4
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc
; GFX9-NEXT: v_pk_min_f16 v7, v1, 2.0
+; GFX9-NEXT: s_mov_b32 s1, 0x5040100
+; GFX9-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v1
+; GFX9-NEXT: s_movk_i32 s0, 0x7e00
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v7, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v6, v4, vcc
-; GFX9-NEXT: s_mov_b32 s5, 0x5040100
-; GFX9-NEXT: v_perm_b32 v4, v5, v0, s5
+; GFX9-NEXT: v_perm_b32 v4, v5, v0, s1
; GFX9-NEXT: v_pk_min_f16 v4, v4, v2
-; GFX9-NEXT: s_movk_i32 s4, 0x7e00
-; GFX9-NEXT: v_lshrrev_b32_e32 v7, 16, v4
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v5, v2 src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: v_lshrrev_b32_e32 v7, 16, v4
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v5, v6, v7, vcc
-; GFX9-NEXT: v_pack_b32_f16 v7, v1, s4
+; GFX9-NEXT: v_pack_b32_f16 v7, v1, s0
; GFX9-NEXT: v_pk_min_f16 v7, v7, v3
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v7, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v6, v4, vcc
-; GFX9-NEXT: v_perm_b32 v0, v5, v0, s5
+; GFX9-NEXT: v_perm_b32 v0, v5, v0, s1
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x half> @llvm.minimum.v3f16(<3 x half> %a, <3 x half> <half 2.0, half 2.0, half 2.0>)
%max1 = call <3 x half> @llvm.minimum.v3f16(<3 x half> %max0, <3 x half> %c)
@@ -1957,29 +2138,35 @@ define <3 x half> @v_fminimum3_v3f16__inlineimm2(<3 x half> %a, <3 x half> %b) {
; GFX9-NEXT: v_pk_min_f16 v4, v0, v2
; GFX9-NEXT: v_mov_b32_e32 v5, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v4
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
; GFX9-NEXT: v_pk_min_f16 v2, v1, v3
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v1, v1, v4, s4
+; GFX9-NEXT: v_perm_b32 v1, v1, v4, s0
; GFX9-NEXT: v_pk_min_f16 v1, v1, 4.0
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v4, v4
-; GFX9-NEXT: v_perm_b32 v2, v0, v6, s4
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX9-NEXT: v_perm_b32 v2, v0, v6, s0
; GFX9-NEXT: v_pk_min_f16 v2, v2, 4.0 op_sel_hi:[1,0]
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v6, v6
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v3, v5, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc
-; GFX9-NEXT: v_perm_b32 v0, v0, v3, s4
+; GFX9-NEXT: v_perm_b32 v0, v0, v3, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x half> @llvm.minimum.v3f16(<3 x half> %a, <3 x half> %b)
%max1 = call <3 x half> @llvm.minimum.v3f16(<3 x half> %max0, <3 x half> <half 4.0, half 4.0, half 4.0>)
@@ -2007,33 +2194,41 @@ define <4 x half> @v_fminimum3_v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c
; GFX9-NEXT: v_pk_min_f16 v6, v0, v2
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v8, v7, v6, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v6
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX9-NEXT: v_pk_min_f16 v2, v1, v3
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v6, v7, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v2, v1, v6, s4
+; GFX9-NEXT: v_perm_b32 v2, v1, v6, s0
; GFX9-NEXT: v_pk_min_f16 v2, v5, v2
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v5, v6
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v5, v1 src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX9-NEXT: v_perm_b32 v2, v0, v8, s4
+; GFX9-NEXT: v_perm_b32 v2, v0, v8, s0
; GFX9-NEXT: v_pk_min_f16 v2, v4, v2
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v4, v8
+; GFX9-NEXT: v_perm_b32 v1, v1, v3, s0
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v5, v7, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v4, v0 src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX9-NEXT: v_perm_b32 v0, v0, v5, s4
-; GFX9-NEXT: v_perm_b32 v1, v1, v3, s4
+; GFX9-NEXT: v_perm_b32 v0, v0, v5, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <4 x half> @llvm.minimum.v4f16(<4 x half> %a, <4 x half> %b)
%max1 = call <4 x half> @llvm.minimum.v4f16(<4 x half> %c, <4 x half> %max0)
@@ -2061,33 +2256,41 @@ define <4 x half> @v_fminimum3_v4f16_commute(<4 x half> %a, <4 x half> %b, <4 x
; GFX9-NEXT: v_pk_min_f16 v6, v0, v2
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v8, v7, v6, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v6
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX9-NEXT: v_pk_min_f16 v2, v1, v3
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v6, v7, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v2, v1, v6, s4
+; GFX9-NEXT: v_perm_b32 v2, v1, v6, s0
; GFX9-NEXT: v_pk_min_f16 v2, v2, v5
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v6, v5
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, v5 src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX9-NEXT: v_perm_b32 v2, v0, v8, s4
+; GFX9-NEXT: v_perm_b32 v2, v0, v8, s0
; GFX9-NEXT: v_pk_min_f16 v2, v2, v4
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v8, v4
+; GFX9-NEXT: v_perm_b32 v1, v1, v3, s0
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v5, v7, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX9-NEXT: v_perm_b32 v0, v0, v5, s4
-; GFX9-NEXT: v_perm_b32 v1, v1, v3, s4
+; GFX9-NEXT: v_perm_b32 v0, v0, v5, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <4 x half> @llvm.minimum.v4f16(<4 x half> %a, <4 x half> %b)
%max1 = call <4 x half> @llvm.minimum.v4f16(<4 x half> %max0, <4 x half> %c)
@@ -2124,37 +2327,43 @@ define <4 x half> @v_fminimum3_v4f16__fabs_all(<4 x half> %a, <4 x half> %b, <4
; GFX9-NEXT: v_and_b32_e32 v6, 0x7fff7fff, v1
; GFX9-NEXT: v_and_b32_e32 v8, 0x7fff7fff, v3
; GFX9-NEXT: v_pk_min_f16 v7, v7, v9
-; GFX9-NEXT: v_lshrrev_b32_e32 v9, 16, v7
; GFX9-NEXT: v_mov_b32_e32 v12, 0x7e00
+; GFX9-NEXT: v_lshrrev_b32_e32 v9, 16, v7
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, |v0|, |v2| src0_sel:WORD_1 src1_sel:WORD_1
; GFX9-NEXT: v_pk_min_f16 v6, v6, v8
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
; GFX9-NEXT: v_cndmask_b32_e32 v9, v12, v9, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v8, 16, v6
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, |v1|, |v3| src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: v_and_b32_e32 v11, 0x7fff7fff, v5
+; GFX9-NEXT: v_and_b32_e32 v10, 0x7fff7fff, v4
; GFX9-NEXT: v_cndmask_b32_e32 v8, v12, v8, vcc
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, |v0|, |v2|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v12, v7, vcc
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, |v1|, |v3|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v12, v6, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_and_b32_e32 v11, 0x7fff7fff, v5
-; GFX9-NEXT: v_perm_b32 v2, v8, v1, s4
-; GFX9-NEXT: v_and_b32_e32 v10, 0x7fff7fff, v4
+; GFX9-NEXT: v_perm_b32 v2, v8, v1, s0
; GFX9-NEXT: v_pk_min_f16 v2, v2, v11
-; GFX9-NEXT: v_perm_b32 v6, v9, v0, s4
+; GFX9-NEXT: v_perm_b32 v6, v9, v0, s0
; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v8, |v5| src0_sel:DWORD src1_sel:WORD_1
; GFX9-NEXT: v_pk_min_f16 v6, v6, v10
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v3, v12, v3, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v7, 16, v6
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v9, |v4| src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v7, v12, v7, vcc
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v1, |v5|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v12, v2, vcc
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v0, |v4|
+; GFX9-NEXT: v_perm_b32 v1, v3, v1, s0
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v0, v12, v6, vcc
-; GFX9-NEXT: v_perm_b32 v0, v7, v0, s4
-; GFX9-NEXT: v_perm_b32 v1, v3, v1, s4
+; GFX9-NEXT: v_perm_b32 v0, v7, v0, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call <4 x half> @llvm.fabs.v4f16(<4 x half> %a)
%b.fabs = call <4 x half> @llvm.fabs.v4f16(<4 x half> %b)
@@ -2185,33 +2394,41 @@ define <4 x half> @v_fminimum3_v4f16__fneg_all(<4 x half> %a, <4 x half> %b, <4
; GFX9-NEXT: v_pk_min_f16 v6, v0, v2 neg_lo:[1,1] neg_hi:[1,1]
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, -v0, -v2
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v8, v7, v6, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v6
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, -v0, -v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX9-NEXT: v_pk_min_f16 v2, v1, v3 neg_lo:[1,1] neg_hi:[1,1]
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, -v1, -v3
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v6, v7, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, -v1, -v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v2, v1, v6, s4
+; GFX9-NEXT: v_perm_b32 v2, v1, v6, s0
; GFX9-NEXT: v_pk_min_f16 v2, v2, v5 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v6, -v5
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, -v5 src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX9-NEXT: v_perm_b32 v2, v0, v8, s4
+; GFX9-NEXT: v_perm_b32 v2, v0, v8, s0
; GFX9-NEXT: v_pk_min_f16 v2, v2, v4 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v8, -v4
+; GFX9-NEXT: v_perm_b32 v1, v1, v3, s0
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v5, v7, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, -v4 src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX9-NEXT: v_perm_b32 v0, v0, v5, s4
-; GFX9-NEXT: v_perm_b32 v1, v1, v3, s4
+; GFX9-NEXT: v_perm_b32 v0, v0, v5, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg <4 x half> %a
%b.fneg = fneg <4 x half> %b
@@ -2240,35 +2457,41 @@ define <4 x half> @v_fminimum3_v4f16__inlineimm1(<4 x half> %a, <4 x half> %c) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_pk_min_f16 v4, v0, 2.0 op_sel_hi:[1,0]
-; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v4
; GFX9-NEXT: v_mov_b32_e32 v6, 0x7e00
+; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v4
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1
; GFX9-NEXT: v_pk_min_f16 v7, v1, 2.0 op_sel_hi:[1,0]
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
; GFX9-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v8, 16, v7
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v8, v6, v8, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v6, v4, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v7, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v4, v8, v1, s4
+; GFX9-NEXT: v_perm_b32 v4, v8, v1, s0
; GFX9-NEXT: v_pk_min_f16 v4, v4, v3
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v8, v3 src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT: v_perm_b32 v8, v5, v0, s4
+; GFX9-NEXT: v_perm_b32 v8, v5, v0, s0
; GFX9-NEXT: v_lshrrev_b32_e32 v7, 16, v4
; GFX9-NEXT: v_pk_min_f16 v8, v8, v2
; GFX9-NEXT: v_cndmask_b32_e32 v7, v6, v7, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v9, 16, v8
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v5, v2 src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v5, v6, v9, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v4, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX9-NEXT: v_perm_b32 v1, v7, v1, s0
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v0, v6, v8, vcc
-; GFX9-NEXT: v_perm_b32 v0, v5, v0, s4
-; GFX9-NEXT: v_perm_b32 v1, v7, v1, s4
+; GFX9-NEXT: v_perm_b32 v0, v5, v0, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <4 x half> @llvm.minimum.v4f16(<4 x half> %a, <4 x half> <half 2.0, half 2.0, half 2.0, half 2.0>)
%max1 = call <4 x half> @llvm.minimum.v4f16(<4 x half> %max0, <4 x half> %c)
@@ -2296,33 +2519,41 @@ define <4 x half> @v_fminimum3_v4f16__inlineimm2(<4 x half> %a, <4 x half> %b) {
; GFX9-NEXT: v_pk_min_f16 v4, v0, v2
; GFX9-NEXT: v_mov_b32_e32 v5, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v4
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
; GFX9-NEXT: v_pk_min_f16 v2, v1, v3
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v2, v1, v4, s4
+; GFX9-NEXT: v_perm_b32 v2, v1, v4, s0
; GFX9-NEXT: v_pk_min_f16 v2, v2, 4.0 op_sel_hi:[1,0]
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v4, v4
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v3, v5, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
-; GFX9-NEXT: v_perm_b32 v2, v0, v6, s4
+; GFX9-NEXT: v_perm_b32 v2, v0, v6, s0
; GFX9-NEXT: v_pk_min_f16 v2, v2, 4.0 op_sel_hi:[1,0]
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v6, v6
+; GFX9-NEXT: v_perm_b32 v1, v1, v3, s0
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v2, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc
-; GFX9-NEXT: v_perm_b32 v0, v0, v4, s4
-; GFX9-NEXT: v_perm_b32 v1, v1, v3, s4
+; GFX9-NEXT: v_perm_b32 v0, v0, v4, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <4 x half> @llvm.minimum.v4f16(<4 x half> %a, <4 x half> %b)
%max1 = call <4 x half> @llvm.minimum.v4f16(<4 x half> %max0, <4 x half> <half 4.0, half 4.0, half 4.0, half 4.0>)
@@ -2346,12 +2577,14 @@ define double @v_fminimum3_f64(double %a, double %b, double %c) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_min_f64 v[6:7], v[0:1], v[2:3]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], v[4:5]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2377,12 +2610,14 @@ define double @v_fminimum3_f64_commute(double %a, double %b, double %c) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_min_f64 v[6:7], v[0:1], v[2:3]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX9-NEXT: v_min_f64 v[2:3], v[4:5], v[0:1]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[0:1]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2404,19 +2639,20 @@ define amdgpu_ps <2 x i32> @s_fminimum3_f64(double inreg %a, double inreg %b, do
;
; GFX9-LABEL: s_fminimum3_f64:
; GFX9: ; %bb.0:
-; GFX9-NEXT: v_mov_b32_e32 v0, s2
-; GFX9-NEXT: v_mov_b32_e32 v1, s3
+; GFX9-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
; GFX9-NEXT: v_min_f64 v[2:3], s[0:1], v[0:1]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, s[0:1], v[0:1]
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, s[0:1], v[0:1]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], s[4:5]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, s[4:5], v[0:1]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v4, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v1, v2, 0, vcc
-; GFX9-NEXT: v_readfirstlane_b32 s0, v1
; GFX9-NEXT: v_readfirstlane_b32 s1, v0
+; GFX9-NEXT: v_readfirstlane_b32 s0, v1
; GFX9-NEXT: ; return to shader part epilog
%max0 = call double @llvm.minimum.f64(double %a, double %b)
%max1 = call double @llvm.minimum.f64(double %max0, double %c)
@@ -2447,12 +2683,14 @@ define double @v_fminimum3_f64_fabs0(double %a, double %b, double %c) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_min_f64 v[6:7], |v[0:1]|, v[2:3]
-; GFX9-NEXT: v_cmp_u_f64_e64 vcc, |v[0:1]|, v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e64 vcc, |v[0:1]|, v[2:3]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], v[4:5]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2479,12 +2717,14 @@ define double @v_fminimum3_f64_fabs1(double %a, double %b, double %c) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_min_f64 v[6:7], v[0:1], |v[2:3]|
-; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], |v[2:3]|
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], |v[2:3]|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], v[4:5]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2511,12 +2751,14 @@ define double @v_fminimum3_f64_fabs2(double %a, double %b, double %c) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_min_f64 v[6:7], v[0:1], v[2:3]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], |v[4:5]|
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], |v[4:5]|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2543,12 +2785,14 @@ define double @v_fminimum3_f64_fabs_all(double %a, double %b, double %c) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_min_f64 v[6:7], |v[0:1]|, |v[2:3]|
-; GFX9-NEXT: v_cmp_u_f64_e64 vcc, |v[0:1]|, |v[2:3]|
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e64 vcc, |v[0:1]|, |v[2:3]|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], |v[4:5]|
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], |v[4:5]|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2577,12 +2821,14 @@ define double @v_fminimum3_f64_fneg_all(double %a, double %b, double %c) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_min_f64 v[6:7], -v[0:1], -v[2:3]
-; GFX9-NEXT: v_cmp_u_f64_e64 vcc, -v[0:1], -v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e64 vcc, -v[0:1], -v[2:3]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], -v[4:5]
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], -v[4:5]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2611,12 +2857,14 @@ define double @v_fminimum3_f64_fneg_fabs_all(double %a, double %b, double %c) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_min_f64 v[6:7], -|v[0:1]|, -|v[2:3]|
-; GFX9-NEXT: v_cmp_u_f64_e64 vcc, -|v[0:1]|, -|v[2:3]|
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e64 vcc, -|v[0:1]|, -|v[2:3]|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], -|v[4:5]|
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], -|v[4:5]|
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2648,12 +2896,14 @@ define double @v_fminimum3_f64_fneg0(double %a, double %b, double %c) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_min_f64 v[6:7], -v[0:1], v[2:3]
-; GFX9-NEXT: v_cmp_u_f64_e64 vcc, -v[0:1], v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e64 vcc, -v[0:1], v[2:3]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], v[4:5]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2680,12 +2930,14 @@ define double @v_fminimum3_f64_fneg1(double %a, double %b, double %c) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_min_f64 v[6:7], v[0:1], -v[2:3]
-; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], -v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], -v[2:3]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], v[4:5]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2712,12 +2964,14 @@ define double @v_fminimum3_f64_fneg2(double %a, double %b, double %c) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_min_f64 v[6:7], v[0:1], v[2:3]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], -v[4:5]
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], -v[4:5]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2743,15 +2997,17 @@ define double @v_fminimum3_f64_const0(double %b, double %c) {
; GFX9-LABEL: v_fminimum3_f64_const0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_mov_b32 s4, 0
-; GFX9-NEXT: s_mov_b32 s5, 0x40200000
-; GFX9-NEXT: v_min_f64 v[4:5], v[0:1], s[4:5]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT: s_mov_b32 s0, 0
+; GFX9-NEXT: s_mov_b32 s1, 0x40200000
+; GFX9-NEXT: v_min_f64 v[4:5], v[0:1], s[0:1]
; GFX9-NEXT: v_mov_b32_e32 v6, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
; GFX9-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2777,14 +3033,15 @@ define double @v_fminimum3_f64__const2(double %a, double %b) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v6, 0x7ff80000
-; GFX9-NEXT: s_mov_b32 s4, 0
-; GFX9-NEXT: s_mov_b32 s5, 0x40200000
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT: s_mov_b32 s0, 0
+; GFX9-NEXT: s_mov_b32 s1, 0x40200000
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], s[4:5]
+; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], s[0:1]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v6, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2810,12 +3067,14 @@ define double @v_fminimum3_f64_inlineimm0(double %b, double %c) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_min_f64 v[4:5], v[0:1], 4.0
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
; GFX9-NEXT: v_mov_b32_e32 v6, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
; GFX9-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2841,12 +3100,14 @@ define double @v_fminimum3_f64__inlineimm(double %a, double %b) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v6, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], 4.0
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v6, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2871,17 +3132,18 @@ define double @v_fminimum3_f64_const1_const2(double %a) {
; GFX9-LABEL: v_fminimum3_f64_const1_const2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_mov_b32 s4, 0
-; GFX9-NEXT: s_mov_b32 s5, 0x40200000
-; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], s[4:5]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT: s_mov_b32 s0, 0
+; GFX9-NEXT: s_mov_b32 s1, 0x40200000
+; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], s[0:1]
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7ff80000
-; GFX9-NEXT: s_mov_b32 s4, 0
-; GFX9-NEXT: s_mov_b32 s5, 0x40300000
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT: s_mov_b32 s0, 0
+; GFX9-NEXT: s_mov_b32 s1, 0x40300000
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], s[4:5]
+; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], s[0:1]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2909,9 +3171,11 @@ define <2 x float> @v_no_fminimum3_f32__multi_use(float %a, float %b, float %c)
; GFX9-NEXT: v_min_f32_e32 v3, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_min_f32_e32 v1, v0, v2
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.minimum.f32(float %a, float %b)
@@ -2935,11 +3199,14 @@ define amdgpu_ps <2 x i32> @s_no_fminimum3_f32__multi_use(float inreg %a, float
; GFX9-NEXT: v_min_f32_e32 v1, s0, v0
; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX9-NEXT: v_min_f32_e32 v1, s2, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s2, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_readfirstlane_b32 s1, v1
; GFX9-NEXT: ; return to shader part epilog
%max0 = call float @llvm.minimum.f32(float %a, float %b)
@@ -2973,9 +3240,11 @@ define <2 x half> @v_no_fminimum3_f16__multi_use(half %a, half %b, half %c) {
; GFX9-NEXT: v_min_f16_e32 v3, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
; GFX9-NEXT: v_min_f16_e32 v1, v0, v2
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -3002,11 +3271,13 @@ define amdgpu_ps <2 x i32> @s_no_fminimum3_f16__multi_use(half inreg %a, half in
; GFX9-NEXT: v_min_f16_e32 v1, s0, v0
; GFX9-NEXT: v_mov_b32_e32 v2, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, s0, v0
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX9-NEXT: v_min_f16_e32 v1, s2, v0
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, s2, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
; GFX9-NEXT: v_readfirstlane_b32 s1, v1
@@ -3043,19 +3314,23 @@ define <4 x half> @v_no_fminimum3_v2f16__multi_use(<2 x half> %a, <2 x half> %b,
; GFX9-NEXT: v_pk_min_f16 v3, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX9-NEXT: s_mov_b32 s0, 0x5040100
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_cndmask_b32_e32 v5, v4, v3, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v0, v1, v5, s4
+; GFX9-NEXT: v_perm_b32 v0, v1, v5, s0
; GFX9-NEXT: v_pk_min_f16 v3, v0, v2
; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v5, v2
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v5, v4, v3, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
-; GFX9-NEXT: v_perm_b32 v1, v1, v5, s4
+; GFX9-NEXT: v_perm_b32 v1, v1, v5, s0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x half> @llvm.minimum.f16(<2 x half> %a, <2 x half> %b)
%max1 = call <2 x half> @llvm.minimum.f16(<2 x half> %max0, <2 x half> %c)
@@ -3080,12 +3355,14 @@ define <2 x double> @v_no_fminimum3_f64__multi_use(double %a, double %b, double
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_min_f64 v[6:7], v[0:1], v[2:3]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], v[4:5]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -3095,3 +3372,6 @@ define <2 x double> @v_no_fminimum3_f64__multi_use(double %a, double %b, double
%insert.1 = insertelement <2 x double> %insert.0, double %max1, i32 1
ret <2 x double> %insert.1
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX940: {{.*}}
+; GFX950: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll
index 8313f5b655efba..bd35ee3f009736 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX940 %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX940 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX940 %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX940 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll
index d90c4a75ac5dea..e782f53cee6087 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck -check-prefixes=GCN,GFX7 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GCN,GFX8 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GCN,GFX940 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX900 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX950 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX10 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GCN,GFX12 %s
@@ -30,24 +30,24 @@ define half @v_maximum_f16(half %src0, half %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_f16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f16_e32 v2, v0, v1
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x7e00
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_f16:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f16_e32 v2, v0, v1
-; GFX940-NEXT: v_mov_b32_e32 v3, 0x7e00
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_f16:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f16_e32 v2, v0, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, 0x7e00
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_f16:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f16_e32 v2, v0, v1
+; GFX950-NEXT: v_mov_b32_e32 v3, 0x7e00
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_f16:
; GFX10: ; %bb.0:
@@ -102,12 +102,6 @@ define half @v_maximum_f16__nnan(half %src0, half %src1) {
; GFX9-NEXT: v_max_f16_e32 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_maximum_f16__nnan:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_maximum_f16__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -156,24 +150,24 @@ define half @v_maximum_f16__nsz(half %src0, half %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_f16__nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f16_e32 v2, v0, v1
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x7e00
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_f16__nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f16_e32 v2, v0, v1
-; GFX940-NEXT: v_mov_b32_e32 v3, 0x7e00
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_f16__nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f16_e32 v2, v0, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, 0x7e00
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_f16__nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f16_e32 v2, v0, v1
+; GFX950-NEXT: v_mov_b32_e32 v3, 0x7e00
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_f16__nsz:
; GFX10: ; %bb.0:
@@ -228,12 +222,6 @@ define half @v_maximum_f16__nnan_nsz(half %src0, half %src1) {
; GFX9-NEXT: v_max_f16_e32 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_maximum_f16__nnan_nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_maximum_f16__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -284,26 +272,26 @@ define half @v_maximum_f16__nnan_src0(half %arg0, half %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_f16__nnan_src0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_add_f16_e32 v0, 1.0, v0
-; GFX9-NEXT: v_max_f16_e32 v2, v0, v1
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x7e00
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_f16__nnan_src0:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_add_f16_e32 v0, 1.0, v0
-; GFX940-NEXT: v_max_f16_e32 v2, v0, v1
-; GFX940-NEXT: v_mov_b32_e32 v3, 0x7e00
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_f16__nnan_src0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_add_f16_e32 v0, 1.0, v0
+; GFX900-NEXT: v_max_f16_e32 v2, v0, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, 0x7e00
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_f16__nnan_src0:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_add_f16_e32 v0, 1.0, v0
+; GFX950-NEXT: v_max_f16_e32 v2, v0, v1
+; GFX950-NEXT: v_mov_b32_e32 v3, 0x7e00
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_f16__nnan_src0:
; GFX10: ; %bb.0:
@@ -365,26 +353,26 @@ define half @v_maximum_f16__nnan_src1(half %src0, half %arg1) {
; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_f16__nnan_src1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_add_f16_e32 v1, 1.0, v1
-; GFX9-NEXT: v_max_f16_e32 v2, v0, v1
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x7e00
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_f16__nnan_src1:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_add_f16_e32 v1, 1.0, v1
-; GFX940-NEXT: v_max_f16_e32 v2, v0, v1
-; GFX940-NEXT: v_mov_b32_e32 v3, 0x7e00
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_f16__nnan_src1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_add_f16_e32 v1, 1.0, v1
+; GFX900-NEXT: v_max_f16_e32 v2, v0, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, 0x7e00
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_f16__nnan_src1:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_add_f16_e32 v1, 1.0, v1
+; GFX950-NEXT: v_max_f16_e32 v2, v0, v1
+; GFX950-NEXT: v_mov_b32_e32 v3, 0x7e00
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_f16__nnan_src1:
; GFX10: ; %bb.0:
@@ -453,34 +441,34 @@ define void @s_maximum_f16(half inreg %src0, half inreg %src1) {
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: s_maximum_f16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v0, s17
-; GFX9-NEXT: v_max_f16_e32 v1, s16, v0
-; GFX9-NEXT: v_mov_b32_e32 v2, 0x7e00
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, s16, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use v0
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: s_maximum_f16:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_mov_b32_e32 v0, s1
-; GFX940-NEXT: v_max_f16_e32 v1, s0, v0
-; GFX940-NEXT: v_mov_b32_e32 v2, 0x7e00
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, s0, v0
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX940-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX940-NEXT: ;;#ASMSTART
-; GFX940-NEXT: ; use v0
-; GFX940-NEXT: ;;#ASMEND
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_maximum_f16:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, s17
+; GFX900-NEXT: v_max_f16_e32 v1, s16, v0
+; GFX900-NEXT: v_mov_b32_e32 v2, 0x7e00
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, s16, v0
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX900-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use v0
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: s_maximum_f16:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_mov_b32_e32 v0, s1
+; GFX950-NEXT: v_max_f16_e32 v1, s0, v0
+; GFX950-NEXT: v_mov_b32_e32 v2, 0x7e00
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, s0, v0
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX950-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX950-NEXT: ;;#ASMSTART
+; GFX950-NEXT: ; use v0
+; GFX950-NEXT: ;;#ASMEND
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: s_maximum_f16:
; GFX10: ; %bb.0:
@@ -567,35 +555,35 @@ define <2 x half> @v_maximum_v2f16(<2 x half> %src0, <2 x half> %src1) {
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v2f16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_max_f16 v2, v0, v1
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x7e00
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v3, v2, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v0, v0, v4, s4
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_v2f16:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_pk_max_f16 v2, v0, v1
-; GFX940-NEXT: v_mov_b32_e32 v3, 0x7e00
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
-; GFX940-NEXT: s_mov_b32 s0, 0x5040100
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v3, v2, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX940-NEXT: v_perm_b32 v0, v0, v4, s0
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v2f16:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_pk_max_f16 v2, v0, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, 0x7e00
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX900-NEXT: v_cndmask_b32_e32 v4, v3, v2, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v2, 16, v2
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX900-NEXT: s_mov_b32 s4, 0x5040100
+; GFX900-NEXT: v_perm_b32 v0, v0, v4, s4
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v2f16:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_pk_max_f16 v2, v0, v1
+; GFX950-NEXT: v_mov_b32_e32 v3, 0x7e00
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX950-NEXT: s_mov_b32 s0, 0x5040100
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v4, v3, v2, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v2, 16, v2
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX950-NEXT: v_perm_b32 v0, v0, v4, s0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v2f16:
; GFX10: ; %bb.0:
@@ -668,12 +656,6 @@ define <2 x half> @v_maximum_v2f16__nnan(<2 x half> %src0, <2 x half> %src1) {
; GFX9-NEXT: v_pk_max_f16 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_maximum_v2f16__nnan:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_pk_max_f16 v0, v0, v1
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_maximum_v2f16__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -736,35 +718,35 @@ define <2 x half> @v_maximum_v2f16__nsz(<2 x half> %src0, <2 x half> %src1) {
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v2f16__nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_max_f16 v2, v0, v1
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x7e00
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v3, v2, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v0, v0, v4, s4
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_v2f16__nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_pk_max_f16 v2, v0, v1
-; GFX940-NEXT: v_mov_b32_e32 v3, 0x7e00
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
-; GFX940-NEXT: s_mov_b32 s0, 0x5040100
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v3, v2, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX940-NEXT: v_perm_b32 v0, v0, v4, s0
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v2f16__nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_pk_max_f16 v2, v0, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, 0x7e00
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX900-NEXT: v_cndmask_b32_e32 v4, v3, v2, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v2, 16, v2
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX900-NEXT: s_mov_b32 s4, 0x5040100
+; GFX900-NEXT: v_perm_b32 v0, v0, v4, s4
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v2f16__nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_pk_max_f16 v2, v0, v1
+; GFX950-NEXT: v_mov_b32_e32 v3, 0x7e00
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX950-NEXT: s_mov_b32 s0, 0x5040100
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v4, v3, v2, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v2, 16, v2
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX950-NEXT: v_perm_b32 v0, v0, v4, s0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v2f16__nsz:
; GFX10: ; %bb.0:
@@ -837,12 +819,6 @@ define <2 x half> @v_maximum_v2f16__nnan_nsz(<2 x half> %src0, <2 x half> %src1)
; GFX9-NEXT: v_pk_max_f16 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_maximum_v2f16__nnan_nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_pk_max_f16 v0, v0, v1
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_maximum_v2f16__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -917,50 +893,50 @@ define void @s_maximum_v2f16(<2 x half> inreg %src0, <2 x half> inreg %src1) {
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: s_maximum_v2f16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v0, s17
-; GFX9-NEXT: v_mov_b32_e32 v1, s17
-; GFX9-NEXT: s_lshr_b32 s4, s17, 16
-; GFX9-NEXT: v_pk_max_f16 v1, s16, v1
-; GFX9-NEXT: v_mov_b32_e32 v2, 0x7e00
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, s16, v0
-; GFX9-NEXT: s_lshr_b32 s5, s16, 16
-; GFX9-NEXT: v_mov_b32_e32 v3, s4
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v1
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, s5, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX9-NEXT: v_lshl_or_b32 v0, v1, 16, v0
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use v0
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: s_maximum_v2f16:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_mov_b32_e32 v0, s1
-; GFX940-NEXT: v_mov_b32_e32 v1, s1
-; GFX940-NEXT: s_lshr_b32 s1, s1, 16
-; GFX940-NEXT: v_pk_max_f16 v1, s0, v1
-; GFX940-NEXT: v_mov_b32_e32 v2, 0x7e00
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, s0, v0
-; GFX940-NEXT: s_lshr_b32 s0, s0, 16
-; GFX940-NEXT: v_mov_b32_e32 v3, s1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, s0, v3
-; GFX940-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX940-NEXT: v_lshl_or_b32 v0, v1, 16, v0
-; GFX940-NEXT: ;;#ASMSTART
-; GFX940-NEXT: ; use v0
-; GFX940-NEXT: ;;#ASMEND
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_maximum_v2f16:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, s17
+; GFX900-NEXT: v_mov_b32_e32 v1, s17
+; GFX900-NEXT: s_lshr_b32 s4, s17, 16
+; GFX900-NEXT: v_pk_max_f16 v1, s16, v1
+; GFX900-NEXT: v_mov_b32_e32 v2, 0x7e00
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, s16, v0
+; GFX900-NEXT: s_lshr_b32 s5, s16, 16
+; GFX900-NEXT: v_mov_b32_e32 v3, s4
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, s5, v3
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX900-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX900-NEXT: v_lshl_or_b32 v0, v1, 16, v0
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use v0
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: s_maximum_v2f16:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_mov_b32_e32 v0, s1
+; GFX950-NEXT: v_mov_b32_e32 v1, s1
+; GFX950-NEXT: s_lshr_b32 s1, s1, 16
+; GFX950-NEXT: v_pk_max_f16 v1, s0, v1
+; GFX950-NEXT: v_mov_b32_e32 v2, 0x7e00
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, s0, v0
+; GFX950-NEXT: s_lshr_b32 s0, s0, 16
+; GFX950-NEXT: v_mov_b32_e32 v3, s1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v1, 16, v1
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, s0, v3
+; GFX950-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX950-NEXT: v_lshl_or_b32 v0, v1, 16, v0
+; GFX950-NEXT: ;;#ASMSTART
+; GFX950-NEXT: ; use v0
+; GFX950-NEXT: ;;#ASMEND
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: s_maximum_v2f16:
; GFX10: ; %bb.0:
@@ -1065,41 +1041,41 @@ define <3 x half> @v_maximum_v3f16(<3 x half> %src0, <3 x half> %src1) {
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v3f16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_max_f16 v4, v1, v3
-; GFX9-NEXT: v_mov_b32_e32 v5, 0x7e00
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX9-NEXT: v_pk_max_f16 v3, v0, v2
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v3
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v0, v0, v4, s4
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_v3f16:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_pk_max_f16 v4, v1, v3
-; GFX940-NEXT: v_mov_b32_e32 v5, 0x7e00
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
-; GFX940-NEXT: v_pk_max_f16 v3, v0, v2
-; GFX940-NEXT: s_mov_b32 s0, 0x5040100
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v3, 16, v3
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
-; GFX940-NEXT: v_perm_b32 v0, v0, v4, s0
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v3f16:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_pk_max_f16 v4, v1, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, 0x7e00
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
+; GFX900-NEXT: v_pk_max_f16 v3, v0, v2
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX900-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v3, 16, v3
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
+; GFX900-NEXT: s_mov_b32 s4, 0x5040100
+; GFX900-NEXT: v_perm_b32 v0, v0, v4, s4
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v3f16:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_pk_max_f16 v4, v1, v3
+; GFX950-NEXT: v_mov_b32_e32 v5, 0x7e00
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX950-NEXT: v_pk_max_f16 v3, v0, v2
+; GFX950-NEXT: s_mov_b32 s0, 0x5040100
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v3, 16, v3
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
+; GFX950-NEXT: v_perm_b32 v0, v0, v4, s0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v3f16:
; GFX10: ; %bb.0:
@@ -1187,13 +1163,6 @@ define <3 x half> @v_maximum_v3f16__nnan(<3 x half> %src0, <3 x half> %src1) {
; GFX9-NEXT: v_pk_max_f16 v1, v1, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_maximum_v3f16__nnan:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_pk_max_f16 v0, v0, v2
-; GFX940-NEXT: v_pk_max_f16 v1, v1, v3
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_maximum_v3f16__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1269,41 +1238,41 @@ define <3 x half> @v_maximum_v3f16__nsz(<3 x half> %src0, <3 x half> %src1) {
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v3f16__nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_max_f16 v4, v1, v3
-; GFX9-NEXT: v_mov_b32_e32 v5, 0x7e00
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX9-NEXT: v_pk_max_f16 v3, v0, v2
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v3
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v0, v0, v4, s4
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_v3f16__nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_pk_max_f16 v4, v1, v3
-; GFX940-NEXT: v_mov_b32_e32 v5, 0x7e00
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
-; GFX940-NEXT: v_pk_max_f16 v3, v0, v2
-; GFX940-NEXT: s_mov_b32 s0, 0x5040100
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v3, 16, v3
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
-; GFX940-NEXT: v_perm_b32 v0, v0, v4, s0
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v3f16__nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_pk_max_f16 v4, v1, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, 0x7e00
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
+; GFX900-NEXT: v_pk_max_f16 v3, v0, v2
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX900-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v3, 16, v3
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
+; GFX900-NEXT: s_mov_b32 s4, 0x5040100
+; GFX900-NEXT: v_perm_b32 v0, v0, v4, s4
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v3f16__nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_pk_max_f16 v4, v1, v3
+; GFX950-NEXT: v_mov_b32_e32 v5, 0x7e00
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX950-NEXT: v_pk_max_f16 v3, v0, v2
+; GFX950-NEXT: s_mov_b32 s0, 0x5040100
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v3, 16, v3
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
+; GFX950-NEXT: v_perm_b32 v0, v0, v4, s0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v3f16__nsz:
; GFX10: ; %bb.0:
@@ -1391,13 +1360,6 @@ define <3 x half> @v_maximum_v3f16__nnan_nsz(<3 x half> %src0, <3 x half> %src1)
; GFX9-NEXT: v_pk_max_f16 v1, v1, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_maximum_v3f16__nnan_nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_pk_max_f16 v0, v0, v2
-; GFX940-NEXT: v_pk_max_f16 v1, v1, v3
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_maximum_v3f16__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1487,51 +1449,51 @@ define <4 x half> @v_maximum_v4f16(<4 x half> %src0, <4 x half> %src1) {
; GFX8-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v4f16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_max_f16 v4, v1, v3
-; GFX9-NEXT: v_mov_b32_e32 v5, 0x7e00
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v4
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX9-NEXT: v_pk_max_f16 v3, v0, v2
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v3
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v0, v0, v4, s4
-; GFX9-NEXT: v_perm_b32 v1, v1, v6, s4
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_v4f16:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_pk_max_f16 v4, v1, v3
-; GFX940-NEXT: v_mov_b32_e32 v5, 0x7e00
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
-; GFX940-NEXT: s_mov_b32 s0, 0x5040100
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v4, 16, v4
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: v_pk_max_f16 v3, v0, v2
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
-; GFX940-NEXT: v_perm_b32 v1, v1, v6, s0
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v3, 16, v3
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
-; GFX940-NEXT: v_perm_b32 v0, v0, v4, s0
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v4f16:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_pk_max_f16 v4, v1, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, 0x7e00
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX900-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v4, 16, v4
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
+; GFX900-NEXT: v_pk_max_f16 v3, v0, v2
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX900-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v3, 16, v3
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
+; GFX900-NEXT: s_mov_b32 s4, 0x5040100
+; GFX900-NEXT: v_perm_b32 v0, v0, v4, s4
+; GFX900-NEXT: v_perm_b32 v1, v1, v6, s4
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v4f16:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_pk_max_f16 v4, v1, v3
+; GFX950-NEXT: v_mov_b32_e32 v5, 0x7e00
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX950-NEXT: s_mov_b32 s0, 0x5040100
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v4, 16, v4
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: v_pk_max_f16 v3, v0, v2
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX950-NEXT: v_perm_b32 v1, v1, v6, s0
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v3, 16, v3
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
+; GFX950-NEXT: v_perm_b32 v0, v0, v4, s0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v4f16:
; GFX10: ; %bb.0:
@@ -1635,13 +1597,6 @@ define <4 x half> @v_maximum_v4f16__nnan(<4 x half> %src0, <4 x half> %src1) {
; GFX9-NEXT: v_pk_max_f16 v1, v1, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_maximum_v4f16__nnan:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_pk_max_f16 v0, v0, v2
-; GFX940-NEXT: v_pk_max_f16 v1, v1, v3
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_maximum_v4f16__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1731,51 +1686,51 @@ define <4 x half> @v_maximum_v4f16__nsz(<4 x half> %src0, <4 x half> %src1) {
; GFX8-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v4f16__nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_max_f16 v4, v1, v3
-; GFX9-NEXT: v_mov_b32_e32 v5, 0x7e00
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v4
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX9-NEXT: v_pk_max_f16 v3, v0, v2
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v3
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v0, v0, v4, s4
-; GFX9-NEXT: v_perm_b32 v1, v1, v6, s4
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_v4f16__nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_pk_max_f16 v4, v1, v3
-; GFX940-NEXT: v_mov_b32_e32 v5, 0x7e00
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
-; GFX940-NEXT: s_mov_b32 s0, 0x5040100
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v4, 16, v4
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: v_pk_max_f16 v3, v0, v2
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
-; GFX940-NEXT: v_perm_b32 v1, v1, v6, s0
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v3, 16, v3
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
-; GFX940-NEXT: v_perm_b32 v0, v0, v4, s0
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v4f16__nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_pk_max_f16 v4, v1, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, 0x7e00
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX900-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v4, 16, v4
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
+; GFX900-NEXT: v_pk_max_f16 v3, v0, v2
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX900-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v3, 16, v3
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
+; GFX900-NEXT: s_mov_b32 s4, 0x5040100
+; GFX900-NEXT: v_perm_b32 v0, v0, v4, s4
+; GFX900-NEXT: v_perm_b32 v1, v1, v6, s4
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v4f16__nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_pk_max_f16 v4, v1, v3
+; GFX950-NEXT: v_mov_b32_e32 v5, 0x7e00
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX950-NEXT: s_mov_b32 s0, 0x5040100
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v4, 16, v4
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: v_pk_max_f16 v3, v0, v2
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX950-NEXT: v_perm_b32 v1, v1, v6, s0
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v3, 16, v3
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
+; GFX950-NEXT: v_perm_b32 v0, v0, v4, s0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v4f16__nsz:
; GFX10: ; %bb.0:
@@ -1879,13 +1834,6 @@ define <4 x half> @v_maximum_v4f16__nnan_nsz(<4 x half> %src0, <4 x half> %src1)
; GFX9-NEXT: v_pk_max_f16 v1, v1, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_maximum_v4f16__nnan_nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_pk_max_f16 v0, v0, v2
-; GFX940-NEXT: v_pk_max_f16 v1, v1, v3
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_maximum_v4f16__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2023,83 +1971,83 @@ define <8 x half> @v_maximum_v8f16(<8 x half> %src0, <8 x half> %src1) {
; GFX8-NEXT: v_or_b32_sdwa v3, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v8f16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_max_f16 v8, v3, v7
-; GFX9-NEXT: v_mov_b32_e32 v9, 0x7e00
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v3, v7
-; GFX9-NEXT: v_cndmask_b32_e32 v10, v9, v8, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v8, 16, v8
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v3, v7 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v9, v8, vcc
-; GFX9-NEXT: v_pk_max_f16 v7, v2, v6
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v2, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v9, v7, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v7, 16, v7
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v2, v6 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v9, v7, vcc
-; GFX9-NEXT: v_pk_max_f16 v6, v1, v5
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v7, v9, v6, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v6
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, v5 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v6, vcc
-; GFX9-NEXT: v_pk_max_f16 v5, v0, v4
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v9, v5, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v5
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v9, v5, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v0, v0, v6, s4
-; GFX9-NEXT: v_perm_b32 v1, v1, v7, s4
-; GFX9-NEXT: v_perm_b32 v2, v2, v8, s4
-; GFX9-NEXT: v_perm_b32 v3, v3, v10, s4
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_v8f16:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_pk_max_f16 v8, v3, v7
-; GFX940-NEXT: v_mov_b32_e32 v9, 0x7e00
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v3, v7
-; GFX940-NEXT: s_mov_b32 s0, 0x5040100
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v10, v9, v8, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v8, 16, v8
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v3, v7 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: v_pk_max_f16 v7, v2, v6
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v9, v8, vcc
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v2, v6
-; GFX940-NEXT: v_perm_b32 v3, v3, v10, s0
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v9, v7, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v7, 16, v7
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v2, v6 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: v_pk_max_f16 v6, v1, v5
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v9, v7, vcc
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v1, v5
-; GFX940-NEXT: v_perm_b32 v2, v2, v8, s0
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v9, v6, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v6, 16, v6
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v1, v5 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: v_pk_max_f16 v5, v0, v4
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v9, v6, vcc
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v4
-; GFX940-NEXT: v_perm_b32 v1, v1, v7, s0
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v9, v5, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v5, 16, v5
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v9, v5, vcc
-; GFX940-NEXT: v_perm_b32 v0, v0, v6, s0
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v8f16:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_pk_max_f16 v8, v3, v7
+; GFX900-NEXT: v_mov_b32_e32 v9, 0x7e00
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v3, v7
+; GFX900-NEXT: v_cndmask_b32_e32 v10, v9, v8, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v8, 16, v8
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v3, v7 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v3, v9, v8, vcc
+; GFX900-NEXT: v_pk_max_f16 v7, v2, v6
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v2, v6
+; GFX900-NEXT: v_cndmask_b32_e32 v8, v9, v7, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v7, 16, v7
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v2, v6 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v2, v9, v7, vcc
+; GFX900-NEXT: v_pk_max_f16 v6, v1, v5
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v1, v5
+; GFX900-NEXT: v_cndmask_b32_e32 v7, v9, v6, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v6, 16, v6
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v1, v5 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v9, v6, vcc
+; GFX900-NEXT: v_pk_max_f16 v5, v0, v4
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v4
+; GFX900-NEXT: v_cndmask_b32_e32 v6, v9, v5, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v5, 16, v5
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v9, v5, vcc
+; GFX900-NEXT: s_mov_b32 s4, 0x5040100
+; GFX900-NEXT: v_perm_b32 v0, v0, v6, s4
+; GFX900-NEXT: v_perm_b32 v1, v1, v7, s4
+; GFX900-NEXT: v_perm_b32 v2, v2, v8, s4
+; GFX900-NEXT: v_perm_b32 v3, v3, v10, s4
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v8f16:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_pk_max_f16 v8, v3, v7
+; GFX950-NEXT: v_mov_b32_e32 v9, 0x7e00
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v3, v7
+; GFX950-NEXT: s_mov_b32 s0, 0x5040100
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v10, v9, v8, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v8, 16, v8
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v3, v7 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: v_pk_max_f16 v7, v2, v6
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v9, v8, vcc
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v2, v6
+; GFX950-NEXT: v_perm_b32 v3, v3, v10, s0
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v8, v9, v7, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v7, 16, v7
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v2, v6 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: v_pk_max_f16 v6, v1, v5
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v2, v9, v7, vcc
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v1, v5
+; GFX950-NEXT: v_perm_b32 v2, v2, v8, s0
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v7, v9, v6, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v6, 16, v6
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v1, v5 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: v_pk_max_f16 v5, v0, v4
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v9, v6, vcc
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v0, v4
+; GFX950-NEXT: v_perm_b32 v1, v1, v7, s0
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v6, v9, v5, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v5, 16, v5
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v9, v5, vcc
+; GFX950-NEXT: v_perm_b32 v0, v0, v6, s0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v8f16:
; GFX10: ; %bb.0:
@@ -2400,147 +2348,147 @@ define <16 x half> @v_maximum_v16f16(<16 x half> %src0, <16 x half> %src1) {
; GFX8-NEXT: v_or_b32_sdwa v7, v7, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v16f16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_max_f16 v16, v7, v15
-; GFX9-NEXT: v_mov_b32_e32 v17, 0x7e00
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v7, v15
-; GFX9-NEXT: v_cndmask_b32_e32 v18, v17, v16, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v16, 16, v16
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v7, v15 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v7, v17, v16, vcc
-; GFX9-NEXT: v_pk_max_f16 v15, v6, v14
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v6, v14
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v17, v15, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v15, 16, v15
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v6, v14 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v17, v15, vcc
-; GFX9-NEXT: v_pk_max_f16 v14, v5, v13
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v5, v13
-; GFX9-NEXT: v_cndmask_b32_e32 v15, v17, v14, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v14, 16, v14
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v5, v13 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v17, v14, vcc
-; GFX9-NEXT: v_pk_max_f16 v13, v4, v12
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v4, v12
-; GFX9-NEXT: v_cndmask_b32_e32 v14, v17, v13, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v13, 16, v13
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v4, v12 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v17, v13, vcc
-; GFX9-NEXT: v_pk_max_f16 v12, v3, v11
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v3, v11
-; GFX9-NEXT: v_cndmask_b32_e32 v13, v17, v12, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v12, 16, v12
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v3, v11 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v17, v12, vcc
-; GFX9-NEXT: v_pk_max_f16 v11, v2, v10
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v2, v10
-; GFX9-NEXT: v_cndmask_b32_e32 v12, v17, v11, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v11, 16, v11
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v2, v10 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v17, v11, vcc
-; GFX9-NEXT: v_pk_max_f16 v10, v1, v9
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v9
-; GFX9-NEXT: v_cndmask_b32_e32 v11, v17, v10, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v10, 16, v10
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, v9 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v17, v10, vcc
-; GFX9-NEXT: v_pk_max_f16 v9, v0, v8
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v8
-; GFX9-NEXT: v_cndmask_b32_e32 v10, v17, v9, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v9, 16, v9
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v8 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v17, v9, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v0, v0, v10, s4
-; GFX9-NEXT: v_perm_b32 v1, v1, v11, s4
-; GFX9-NEXT: v_perm_b32 v2, v2, v12, s4
-; GFX9-NEXT: v_perm_b32 v3, v3, v13, s4
-; GFX9-NEXT: v_perm_b32 v4, v4, v14, s4
-; GFX9-NEXT: v_perm_b32 v5, v5, v15, s4
-; GFX9-NEXT: v_perm_b32 v6, v6, v16, s4
-; GFX9-NEXT: v_perm_b32 v7, v7, v18, s4
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_v16f16:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_pk_max_f16 v16, v7, v15
-; GFX940-NEXT: v_mov_b32_e32 v17, 0x7e00
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v7, v15
-; GFX940-NEXT: s_mov_b32 s0, 0x5040100
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v18, v17, v16, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v16, 16, v16
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v7, v15 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: v_pk_max_f16 v15, v6, v14
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v17, v16, vcc
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v6, v14
-; GFX940-NEXT: v_perm_b32 v7, v7, v18, s0
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v17, v15, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v15, 16, v15
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v6, v14 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: v_pk_max_f16 v14, v5, v13
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v17, v15, vcc
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v5, v13
-; GFX940-NEXT: v_perm_b32 v6, v6, v16, s0
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v15, v17, v14, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v14, 16, v14
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v5, v13 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: v_pk_max_f16 v13, v4, v12
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v17, v14, vcc
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v4, v12
-; GFX940-NEXT: v_perm_b32 v5, v5, v15, s0
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v14, v17, v13, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v13, 16, v13
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v4, v12 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: v_pk_max_f16 v12, v3, v11
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v17, v13, vcc
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v3, v11
-; GFX940-NEXT: v_perm_b32 v4, v4, v14, s0
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v13, v17, v12, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v12, 16, v12
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v3, v11 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: v_pk_max_f16 v11, v2, v10
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v17, v12, vcc
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v2, v10
-; GFX940-NEXT: v_perm_b32 v3, v3, v13, s0
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v12, v17, v11, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v11, 16, v11
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v2, v10 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: v_pk_max_f16 v10, v1, v9
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v17, v11, vcc
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v1, v9
-; GFX940-NEXT: v_perm_b32 v2, v2, v12, s0
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v11, v17, v10, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v10, 16, v10
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v1, v9 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: v_pk_max_f16 v9, v0, v8
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v17, v10, vcc
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v8
-; GFX940-NEXT: v_perm_b32 v1, v1, v11, s0
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v10, v17, v9, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v9, 16, v9
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v8 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v17, v9, vcc
-; GFX940-NEXT: v_perm_b32 v0, v0, v10, s0
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v16f16:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_pk_max_f16 v16, v7, v15
+; GFX900-NEXT: v_mov_b32_e32 v17, 0x7e00
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v7, v15
+; GFX900-NEXT: v_cndmask_b32_e32 v18, v17, v16, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v16, 16, v16
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v7, v15 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v7, v17, v16, vcc
+; GFX900-NEXT: v_pk_max_f16 v15, v6, v14
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v6, v14
+; GFX900-NEXT: v_cndmask_b32_e32 v16, v17, v15, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v15, 16, v15
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v6, v14 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v6, v17, v15, vcc
+; GFX900-NEXT: v_pk_max_f16 v14, v5, v13
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v5, v13
+; GFX900-NEXT: v_cndmask_b32_e32 v15, v17, v14, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v14, 16, v14
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v5, v13 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v5, v17, v14, vcc
+; GFX900-NEXT: v_pk_max_f16 v13, v4, v12
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v4, v12
+; GFX900-NEXT: v_cndmask_b32_e32 v14, v17, v13, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v13, 16, v13
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v4, v12 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v4, v17, v13, vcc
+; GFX900-NEXT: v_pk_max_f16 v12, v3, v11
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v3, v11
+; GFX900-NEXT: v_cndmask_b32_e32 v13, v17, v12, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v12, 16, v12
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v3, v11 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v3, v17, v12, vcc
+; GFX900-NEXT: v_pk_max_f16 v11, v2, v10
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v2, v10
+; GFX900-NEXT: v_cndmask_b32_e32 v12, v17, v11, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v11, 16, v11
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v2, v10 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v2, v17, v11, vcc
+; GFX900-NEXT: v_pk_max_f16 v10, v1, v9
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v1, v9
+; GFX900-NEXT: v_cndmask_b32_e32 v11, v17, v10, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v10, 16, v10
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v1, v9 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v17, v10, vcc
+; GFX900-NEXT: v_pk_max_f16 v9, v0, v8
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v8
+; GFX900-NEXT: v_cndmask_b32_e32 v10, v17, v9, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v9, 16, v9
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v0, v8 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v17, v9, vcc
+; GFX900-NEXT: s_mov_b32 s4, 0x5040100
+; GFX900-NEXT: v_perm_b32 v0, v0, v10, s4
+; GFX900-NEXT: v_perm_b32 v1, v1, v11, s4
+; GFX900-NEXT: v_perm_b32 v2, v2, v12, s4
+; GFX900-NEXT: v_perm_b32 v3, v3, v13, s4
+; GFX900-NEXT: v_perm_b32 v4, v4, v14, s4
+; GFX900-NEXT: v_perm_b32 v5, v5, v15, s4
+; GFX900-NEXT: v_perm_b32 v6, v6, v16, s4
+; GFX900-NEXT: v_perm_b32 v7, v7, v18, s4
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v16f16:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_pk_max_f16 v16, v7, v15
+; GFX950-NEXT: v_mov_b32_e32 v17, 0x7e00
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v7, v15
+; GFX950-NEXT: s_mov_b32 s0, 0x5040100
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v18, v17, v16, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v16, 16, v16
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v7, v15 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: v_pk_max_f16 v15, v6, v14
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v7, v17, v16, vcc
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v6, v14
+; GFX950-NEXT: v_perm_b32 v7, v7, v18, s0
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v16, v17, v15, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v15, 16, v15
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v6, v14 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: v_pk_max_f16 v14, v5, v13
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v6, v17, v15, vcc
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v5, v13
+; GFX950-NEXT: v_perm_b32 v6, v6, v16, s0
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v15, v17, v14, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v14, 16, v14
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v5, v13 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: v_pk_max_f16 v13, v4, v12
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v5, v17, v14, vcc
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v4, v12
+; GFX950-NEXT: v_perm_b32 v5, v5, v15, s0
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v14, v17, v13, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v13, 16, v13
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v4, v12 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: v_pk_max_f16 v12, v3, v11
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v4, v17, v13, vcc
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v3, v11
+; GFX950-NEXT: v_perm_b32 v4, v4, v14, s0
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v13, v17, v12, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v12, 16, v12
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v3, v11 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: v_pk_max_f16 v11, v2, v10
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v17, v12, vcc
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v2, v10
+; GFX950-NEXT: v_perm_b32 v3, v3, v13, s0
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v12, v17, v11, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v11, 16, v11
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v2, v10 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: v_pk_max_f16 v10, v1, v9
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v2, v17, v11, vcc
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v1, v9
+; GFX950-NEXT: v_perm_b32 v2, v2, v12, s0
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v11, v17, v10, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v10, 16, v10
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v1, v9 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: v_pk_max_f16 v9, v0, v8
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v17, v10, vcc
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v0, v8
+; GFX950-NEXT: v_perm_b32 v1, v1, v11, s0
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v10, v17, v9, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v9, 16, v9
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v0, v8 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v17, v9, vcc
+; GFX950-NEXT: v_perm_b32 v0, v0, v10, s0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v16f16:
; GFX10: ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll
index 48851cb030233d..c1fdfa2c4cf9ab 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll
@@ -2,7 +2,8 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck -check-prefixes=GCN,GFX7 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GCN,GFX8 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GCN,GFX940 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX900 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX950 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX10 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GCN,GFX12 %s
@@ -26,24 +27,24 @@ define float @v_maximum_f32(float %src0, float %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_f32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v2, v0, v1
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_f32:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f32_e32 v2, v0, v1
-; GFX940-NEXT: v_mov_b32_e32 v3, 0x7fc00000
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_f32:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f32_e32 v2, v0, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, 0x7fc00000
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_f32:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f32_e32 v2, v0, v1
+; GFX950-NEXT: v_mov_b32_e32 v3, 0x7fc00000
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_f32:
; GFX10: ; %bb.0:
@@ -94,12 +95,6 @@ define float @v_maximum_f32__nnan(float %src0, float %src1) {
; GFX9-NEXT: v_max_f32_e32 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_maximum_f32__nnan:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_maximum_f32__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -144,24 +139,24 @@ define float @v_maximum_f32__nsz(float %src0, float %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_f32__nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v2, v0, v1
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_f32__nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f32_e32 v2, v0, v1
-; GFX940-NEXT: v_mov_b32_e32 v3, 0x7fc00000
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_f32__nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f32_e32 v2, v0, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, 0x7fc00000
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_f32__nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f32_e32 v2, v0, v1
+; GFX950-NEXT: v_mov_b32_e32 v3, 0x7fc00000
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_f32__nsz:
; GFX10: ; %bb.0:
@@ -212,12 +207,6 @@ define float @v_maximum_f32__nnan_nsz(float %src0, float %src1) {
; GFX9-NEXT: v_max_f32_e32 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_maximum_f32__nnan_nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_maximum_f32__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -264,26 +253,26 @@ define float @v_maximum_f32__nnan_src0(float %arg0, float %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_f32__nnan_src0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_add_f32_e32 v0, 1.0, v0
-; GFX9-NEXT: v_max_f32_e32 v2, v0, v1
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_f32__nnan_src0:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_add_f32_e32 v0, 1.0, v0
-; GFX940-NEXT: v_max_f32_e32 v2, v0, v1
-; GFX940-NEXT: v_mov_b32_e32 v3, 0x7fc00000
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_f32__nnan_src0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_add_f32_e32 v0, 1.0, v0
+; GFX900-NEXT: v_max_f32_e32 v2, v0, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, 0x7fc00000
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_f32__nnan_src0:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_add_f32_e32 v0, 1.0, v0
+; GFX950-NEXT: v_max_f32_e32 v2, v0, v1
+; GFX950-NEXT: v_mov_b32_e32 v3, 0x7fc00000
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_f32__nnan_src0:
; GFX10: ; %bb.0:
@@ -341,26 +330,26 @@ define float @v_maximum_f32__nnan_src1(float %src0, float %arg1) {
; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_f32__nnan_src1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_add_f32_e32 v1, 1.0, v1
-; GFX9-NEXT: v_max_f32_e32 v2, v0, v1
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_f32__nnan_src1:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_add_f32_e32 v1, 1.0, v1
-; GFX940-NEXT: v_max_f32_e32 v2, v0, v1
-; GFX940-NEXT: v_mov_b32_e32 v3, 0x7fc00000
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_f32__nnan_src1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_add_f32_e32 v1, 1.0, v1
+; GFX900-NEXT: v_max_f32_e32 v2, v0, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, 0x7fc00000
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_f32__nnan_src1:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_add_f32_e32 v1, 1.0, v1
+; GFX950-NEXT: v_max_f32_e32 v2, v0, v1
+; GFX950-NEXT: v_mov_b32_e32 v3, 0x7fc00000
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_f32__nnan_src1:
; GFX10: ; %bb.0:
@@ -424,32 +413,32 @@ define void @s_maximum_f32(float inreg %src0, float inreg %src1) {
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: s_maximum_f32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v0, s17
-; GFX9-NEXT: v_max_f32_e32 v1, s16, v0
-; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s16, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use v0
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: s_maximum_f32:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_mov_b32_e32 v0, s1
-; GFX940-NEXT: v_max_f32_e32 v1, s0, v0
-; GFX940-NEXT: v_mov_b32_e32 v2, 0x7fc00000
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, s0, v0
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX940-NEXT: ;;#ASMSTART
-; GFX940-NEXT: ; use v0
-; GFX940-NEXT: ;;#ASMEND
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_maximum_f32:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, s17
+; GFX900-NEXT: v_max_f32_e32 v1, s16, v0
+; GFX900-NEXT: v_mov_b32_e32 v2, 0x7fc00000
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, s16, v0
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use v0
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: s_maximum_f32:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_mov_b32_e32 v0, s1
+; GFX950-NEXT: v_max_f32_e32 v1, s0, v0
+; GFX950-NEXT: v_mov_b32_e32 v2, 0x7fc00000
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, s0, v0
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX950-NEXT: ;;#ASMSTART
+; GFX950-NEXT: ; use v0
+; GFX950-NEXT: ;;#ASMEND
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: s_maximum_f32:
; GFX10: ; %bb.0:
@@ -517,31 +506,31 @@ define <2 x float> @v_maximum_v2f32(<2 x float> %src0, <2 x float> %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v2f32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v4, v0, v2
-; GFX9-NEXT: v_mov_b32_e32 v5, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX9-NEXT: v_max_f32_e32 v2, v1, v3
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_v2f32:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f32_e32 v4, v0, v2
-; GFX940-NEXT: v_mov_b32_e32 v5, 0x7fc00000
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX940-NEXT: v_max_f32_e32 v2, v1, v3
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v2f32:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f32_e32 v4, v0, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, 0x7fc00000
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
+; GFX900-NEXT: v_max_f32_e32 v2, v1, v3
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v2f32:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f32_e32 v4, v0, v2
+; GFX950-NEXT: v_mov_b32_e32 v5, 0x7fc00000
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX950-NEXT: v_max_f32_e32 v2, v1, v3
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v2f32:
; GFX10: ; %bb.0:
@@ -601,13 +590,6 @@ define <2 x float> @v_maximum_v2f32__nnan(<2 x float> %src0, <2 x float> %src1)
; GFX9-NEXT: v_max_f32_e32 v1, v1, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_maximum_v2f32__nnan:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f32_e32 v0, v0, v2
-; GFX940-NEXT: v_max_f32_e32 v1, v1, v3
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_maximum_v2f32__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -660,31 +642,31 @@ define <2 x float> @v_maximum_v2f32__nsz(<2 x float> %src0, <2 x float> %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v2f32__nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v4, v0, v2
-; GFX9-NEXT: v_mov_b32_e32 v5, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX9-NEXT: v_max_f32_e32 v2, v1, v3
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_v2f32__nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f32_e32 v4, v0, v2
-; GFX940-NEXT: v_mov_b32_e32 v5, 0x7fc00000
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX940-NEXT: v_max_f32_e32 v2, v1, v3
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v2f32__nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f32_e32 v4, v0, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, 0x7fc00000
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
+; GFX900-NEXT: v_max_f32_e32 v2, v1, v3
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v2f32__nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f32_e32 v4, v0, v2
+; GFX950-NEXT: v_mov_b32_e32 v5, 0x7fc00000
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX950-NEXT: v_max_f32_e32 v2, v1, v3
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v2f32__nsz:
; GFX10: ; %bb.0:
@@ -744,13 +726,6 @@ define <2 x float> @v_maximum_v2f32__nnan_nsz(<2 x float> %src0, <2 x float> %sr
; GFX9-NEXT: v_max_f32_e32 v1, v1, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_maximum_v2f32__nnan_nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f32_e32 v0, v0, v2
-; GFX940-NEXT: v_max_f32_e32 v1, v1, v3
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_maximum_v2f32__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -813,40 +788,40 @@ define void @s_maximum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) {
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: s_maximum_v2f32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v0, s19
-; GFX9-NEXT: v_max_f32_e32 v1, s17, v0
-; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s17, v0
-; GFX9-NEXT: v_mov_b32_e32 v0, s18
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX9-NEXT: v_max_f32_e32 v3, s16, v0
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s16, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use v[0:1]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: s_maximum_v2f32:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_mov_b32_e32 v0, s3
-; GFX940-NEXT: v_max_f32_e32 v1, s1, v0
-; GFX940-NEXT: v_mov_b32_e32 v2, 0x7fc00000
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, s1, v0
-; GFX940-NEXT: v_mov_b32_e32 v0, s2
-; GFX940-NEXT: v_max_f32_e32 v3, s0, v0
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, s0, v0
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
-; GFX940-NEXT: ;;#ASMSTART
-; GFX940-NEXT: ; use v[0:1]
-; GFX940-NEXT: ;;#ASMEND
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_maximum_v2f32:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, s19
+; GFX900-NEXT: v_max_f32_e32 v1, s17, v0
+; GFX900-NEXT: v_mov_b32_e32 v2, 0x7fc00000
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, s17, v0
+; GFX900-NEXT: v_mov_b32_e32 v0, s18
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX900-NEXT: v_max_f32_e32 v3, s16, v0
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, s16, v0
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use v[0:1]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: s_maximum_v2f32:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_mov_b32_e32 v0, s3
+; GFX950-NEXT: v_max_f32_e32 v1, s1, v0
+; GFX950-NEXT: v_mov_b32_e32 v2, 0x7fc00000
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, s1, v0
+; GFX950-NEXT: v_mov_b32_e32 v0, s2
+; GFX950-NEXT: v_max_f32_e32 v3, s0, v0
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, s0, v0
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
+; GFX950-NEXT: ;;#ASMSTART
+; GFX950-NEXT: ; use v[0:1]
+; GFX950-NEXT: ;;#ASMEND
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: s_maximum_v2f32:
; GFX10: ; %bb.0:
@@ -927,38 +902,38 @@ define <3 x float> @v_maximum_v3f32(<3 x float> %src0, <3 x float> %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v3f32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v6, v0, v3
-; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX9-NEXT: v_max_f32_e32 v3, v1, v4
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX9-NEXT: v_max_f32_e32 v3, v2, v5
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_v3f32:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f32_e32 v6, v0, v3
-; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX940-NEXT: v_max_f32_e32 v3, v1, v4
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX940-NEXT: v_max_f32_e32 v3, v2, v5
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v3f32:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f32_e32 v6, v0, v3
+; GFX900-NEXT: v_mov_b32_e32 v7, 0x7fc00000
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX900-NEXT: v_max_f32_e32 v3, v1, v4
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
+; GFX900-NEXT: v_max_f32_e32 v3, v2, v5
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
+; GFX900-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v3f32:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f32_e32 v6, v0, v3
+; GFX950-NEXT: v_mov_b32_e32 v7, 0x7fc00000
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
+; GFX950-NEXT: v_max_f32_e32 v3, v1, v4
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
+; GFX950-NEXT: v_max_f32_e32 v3, v2, v5
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v3f32:
; GFX10: ; %bb.0:
@@ -1028,14 +1003,6 @@ define <3 x float> @v_maximum_v3f32__nnan(<3 x float> %src0, <3 x float> %src1)
; GFX9-NEXT: v_max_f32_e32 v2, v2, v5
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_maximum_v3f32__nnan:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f32_e32 v0, v0, v3
-; GFX940-NEXT: v_max_f32_e32 v1, v1, v4
-; GFX940-NEXT: v_max_f32_e32 v2, v2, v5
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_maximum_v3f32__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1097,38 +1064,38 @@ define <3 x float> @v_maximum_v3f32__nsz(<3 x float> %src0, <3 x float> %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v3f32__nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v6, v0, v3
-; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX9-NEXT: v_max_f32_e32 v3, v1, v4
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX9-NEXT: v_max_f32_e32 v3, v2, v5
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_v3f32__nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f32_e32 v6, v0, v3
-; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX940-NEXT: v_max_f32_e32 v3, v1, v4
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX940-NEXT: v_max_f32_e32 v3, v2, v5
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v3f32__nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f32_e32 v6, v0, v3
+; GFX900-NEXT: v_mov_b32_e32 v7, 0x7fc00000
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX900-NEXT: v_max_f32_e32 v3, v1, v4
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
+; GFX900-NEXT: v_max_f32_e32 v3, v2, v5
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
+; GFX900-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v3f32__nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f32_e32 v6, v0, v3
+; GFX950-NEXT: v_mov_b32_e32 v7, 0x7fc00000
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
+; GFX950-NEXT: v_max_f32_e32 v3, v1, v4
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
+; GFX950-NEXT: v_max_f32_e32 v3, v2, v5
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v3f32__nsz:
; GFX10: ; %bb.0:
@@ -1198,14 +1165,6 @@ define <3 x float> @v_maximum_v3f32__nnan_nsz(<3 x float> %src0, <3 x float> %sr
; GFX9-NEXT: v_max_f32_e32 v2, v2, v5
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_maximum_v3f32__nnan_nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f32_e32 v0, v0, v3
-; GFX940-NEXT: v_max_f32_e32 v1, v1, v4
-; GFX940-NEXT: v_max_f32_e32 v2, v2, v5
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_maximum_v3f32__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1273,45 +1232,45 @@ define <4 x float> @v_maximum_v4f32(<4 x float> %src0, <4 x float> %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v4f32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v8, v0, v4
-; GFX9-NEXT: v_mov_b32_e32 v9, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX9-NEXT: v_max_f32_e32 v4, v1, v5
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
-; GFX9-NEXT: v_max_f32_e32 v4, v2, v6
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
-; GFX9-NEXT: v_max_f32_e32 v4, v3, v7
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_v4f32:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f32_e32 v8, v0, v4
-; GFX940-NEXT: v_mov_b32_e32 v9, 0x7fc00000
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
-; GFX940-NEXT: v_max_f32_e32 v4, v1, v5
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
-; GFX940-NEXT: v_max_f32_e32 v4, v2, v6
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
-; GFX940-NEXT: v_max_f32_e32 v4, v3, v7
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v4f32:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f32_e32 v8, v0, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, 0x7fc00000
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
+; GFX900-NEXT: v_max_f32_e32 v4, v1, v5
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
+; GFX900-NEXT: v_max_f32_e32 v4, v2, v6
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
+; GFX900-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
+; GFX900-NEXT: v_max_f32_e32 v4, v3, v7
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
+; GFX900-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v4f32:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f32_e32 v8, v0, v4
+; GFX950-NEXT: v_mov_b32_e32 v9, 0x7fc00000
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
+; GFX950-NEXT: v_max_f32_e32 v4, v1, v5
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
+; GFX950-NEXT: v_max_f32_e32 v4, v2, v6
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
+; GFX950-NEXT: v_max_f32_e32 v4, v3, v7
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v4f32:
; GFX10: ; %bb.0:
@@ -1391,15 +1350,6 @@ define <4 x float> @v_maximum_v4f32__nnan(<4 x float> %src0, <4 x float> %src1)
; GFX9-NEXT: v_max_f32_e32 v3, v3, v7
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_maximum_v4f32__nnan:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f32_e32 v0, v0, v4
-; GFX940-NEXT: v_max_f32_e32 v1, v1, v5
-; GFX940-NEXT: v_max_f32_e32 v2, v2, v6
-; GFX940-NEXT: v_max_f32_e32 v3, v3, v7
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_maximum_v4f32__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1469,45 +1419,45 @@ define <4 x float> @v_maximum_v4f32__nsz(<4 x float> %src0, <4 x float> %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v4f32__nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v8, v0, v4
-; GFX9-NEXT: v_mov_b32_e32 v9, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX9-NEXT: v_max_f32_e32 v4, v1, v5
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
-; GFX9-NEXT: v_max_f32_e32 v4, v2, v6
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
-; GFX9-NEXT: v_max_f32_e32 v4, v3, v7
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_v4f32__nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f32_e32 v8, v0, v4
-; GFX940-NEXT: v_mov_b32_e32 v9, 0x7fc00000
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
-; GFX940-NEXT: v_max_f32_e32 v4, v1, v5
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
-; GFX940-NEXT: v_max_f32_e32 v4, v2, v6
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
-; GFX940-NEXT: v_max_f32_e32 v4, v3, v7
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v4f32__nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f32_e32 v8, v0, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, 0x7fc00000
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
+; GFX900-NEXT: v_max_f32_e32 v4, v1, v5
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
+; GFX900-NEXT: v_max_f32_e32 v4, v2, v6
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
+; GFX900-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
+; GFX900-NEXT: v_max_f32_e32 v4, v3, v7
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
+; GFX900-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v4f32__nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f32_e32 v8, v0, v4
+; GFX950-NEXT: v_mov_b32_e32 v9, 0x7fc00000
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
+; GFX950-NEXT: v_max_f32_e32 v4, v1, v5
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
+; GFX950-NEXT: v_max_f32_e32 v4, v2, v6
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
+; GFX950-NEXT: v_max_f32_e32 v4, v3, v7
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v4f32__nsz:
; GFX10: ; %bb.0:
@@ -1587,15 +1537,6 @@ define <4 x float> @v_maximum_v4f32__nnan_nsz(<4 x float> %src0, <4 x float> %sr
; GFX9-NEXT: v_max_f32_e32 v3, v3, v7
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_maximum_v4f32__nnan_nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f32_e32 v0, v0, v4
-; GFX940-NEXT: v_max_f32_e32 v1, v1, v5
-; GFX940-NEXT: v_max_f32_e32 v2, v2, v6
-; GFX940-NEXT: v_max_f32_e32 v3, v3, v7
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_maximum_v4f32__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1689,73 +1630,73 @@ define <8 x float> @v_maximum_v8f32(<8 x float> %src0, <8 x float> %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v7, v17, v8, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v8f32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v16, v0, v8
-; GFX9-NEXT: v_mov_b32_e32 v17, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v8
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v17, v16, vcc
-; GFX9-NEXT: v_max_f32_e32 v8, v1, v9
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v9
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v17, v8, vcc
-; GFX9-NEXT: v_max_f32_e32 v8, v2, v10
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v10
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v17, v8, vcc
-; GFX9-NEXT: v_max_f32_e32 v8, v3, v11
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v3, v11
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v17, v8, vcc
-; GFX9-NEXT: v_max_f32_e32 v8, v4, v12
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v4, v12
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v17, v8, vcc
-; GFX9-NEXT: v_max_f32_e32 v8, v5, v13
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v5, v13
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v17, v8, vcc
-; GFX9-NEXT: v_max_f32_e32 v8, v6, v14
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v6, v14
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v17, v8, vcc
-; GFX9-NEXT: v_max_f32_e32 v8, v7, v15
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v7, v15
-; GFX9-NEXT: v_cndmask_b32_e32 v7, v17, v8, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_v8f32:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f32_e32 v16, v0, v8
-; GFX940-NEXT: v_mov_b32_e32 v17, 0x7fc00000
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v8
-; GFX940-NEXT: v_max_f32_e32 v8, v1, v9
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v17, v16, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v9
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v17, v8, vcc
-; GFX940-NEXT: v_max_f32_e32 v8, v2, v10
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v10
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v17, v8, vcc
-; GFX940-NEXT: v_max_f32_e32 v8, v3, v11
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v3, v11
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v17, v8, vcc
-; GFX940-NEXT: v_max_f32_e32 v8, v4, v12
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v4, v12
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v17, v8, vcc
-; GFX940-NEXT: v_max_f32_e32 v8, v5, v13
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v5, v13
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v17, v8, vcc
-; GFX940-NEXT: v_max_f32_e32 v8, v6, v14
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v6, v14
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v17, v8, vcc
-; GFX940-NEXT: v_max_f32_e32 v8, v7, v15
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v7, v15
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v17, v8, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v8f32:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f32_e32 v16, v0, v8
+; GFX900-NEXT: v_mov_b32_e32 v17, 0x7fc00000
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v8
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v17, v16, vcc
+; GFX900-NEXT: v_max_f32_e32 v8, v1, v9
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v1, v9
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v17, v8, vcc
+; GFX900-NEXT: v_max_f32_e32 v8, v2, v10
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v2, v10
+; GFX900-NEXT: v_cndmask_b32_e32 v2, v17, v8, vcc
+; GFX900-NEXT: v_max_f32_e32 v8, v3, v11
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v3, v11
+; GFX900-NEXT: v_cndmask_b32_e32 v3, v17, v8, vcc
+; GFX900-NEXT: v_max_f32_e32 v8, v4, v12
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v4, v12
+; GFX900-NEXT: v_cndmask_b32_e32 v4, v17, v8, vcc
+; GFX900-NEXT: v_max_f32_e32 v8, v5, v13
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v5, v13
+; GFX900-NEXT: v_cndmask_b32_e32 v5, v17, v8, vcc
+; GFX900-NEXT: v_max_f32_e32 v8, v6, v14
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v6, v14
+; GFX900-NEXT: v_cndmask_b32_e32 v6, v17, v8, vcc
+; GFX900-NEXT: v_max_f32_e32 v8, v7, v15
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v7, v15
+; GFX900-NEXT: v_cndmask_b32_e32 v7, v17, v8, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v8f32:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f32_e32 v16, v0, v8
+; GFX950-NEXT: v_mov_b32_e32 v17, 0x7fc00000
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v8
+; GFX950-NEXT: v_max_f32_e32 v8, v1, v9
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v17, v16, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v9
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v17, v8, vcc
+; GFX950-NEXT: v_max_f32_e32 v8, v2, v10
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v2, v10
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v2, v17, v8, vcc
+; GFX950-NEXT: v_max_f32_e32 v8, v3, v11
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v3, v11
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v17, v8, vcc
+; GFX950-NEXT: v_max_f32_e32 v8, v4, v12
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v4, v12
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v4, v17, v8, vcc
+; GFX950-NEXT: v_max_f32_e32 v8, v5, v13
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v5, v13
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v5, v17, v8, vcc
+; GFX950-NEXT: v_max_f32_e32 v8, v6, v14
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v6, v14
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v6, v17, v8, vcc
+; GFX950-NEXT: v_max_f32_e32 v8, v7, v15
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v7, v15
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v7, v17, v8, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v8f32:
; GFX10: ; %bb.0:
@@ -1968,136 +1909,136 @@ define <16 x float> @v_maximum_v16f32(<16 x float> %src0, <16 x float> %src1) {
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v16f32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX9-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: v_cmp_o_f32_e64 s[16:17], v0, v16
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v16
-; GFX9-NEXT: buffer_load_dword v16, off, s[0:3], s32
-; GFX9-NEXT: v_writelane_b32 v31, s30, 0
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v17
-; GFX9-NEXT: v_max_f32_e32 v1, v1, v17
-; GFX9-NEXT: v_cmp_o_f32_e64 s[4:5], v2, v18
-; GFX9-NEXT: v_max_f32_e32 v2, v2, v18
-; GFX9-NEXT: v_mov_b32_e32 v17, 0x7fc00000
-; GFX9-NEXT: v_max_f32_e32 v18, v13, v29
-; GFX9-NEXT: v_cmp_o_f32_e64 s[28:29], v13, v29
-; GFX9-NEXT: v_writelane_b32 v31, s31, 1
-; GFX9-NEXT: v_cmp_o_f32_e64 s[6:7], v3, v19
-; GFX9-NEXT: v_max_f32_e32 v3, v3, v19
-; GFX9-NEXT: v_cmp_o_f32_e64 s[8:9], v4, v20
-; GFX9-NEXT: v_max_f32_e32 v4, v4, v20
-; GFX9-NEXT: v_cmp_o_f32_e64 s[10:11], v5, v21
-; GFX9-NEXT: v_max_f32_e32 v5, v5, v21
-; GFX9-NEXT: v_cmp_o_f32_e64 s[12:13], v6, v22
-; GFX9-NEXT: v_max_f32_e32 v6, v6, v22
-; GFX9-NEXT: v_cmp_o_f32_e64 s[14:15], v7, v23
-; GFX9-NEXT: v_max_f32_e32 v7, v7, v23
-; GFX9-NEXT: v_cmp_o_f32_e64 s[18:19], v8, v24
-; GFX9-NEXT: v_max_f32_e32 v8, v8, v24
-; GFX9-NEXT: v_cmp_o_f32_e64 s[20:21], v9, v25
-; GFX9-NEXT: v_max_f32_e32 v9, v9, v25
-; GFX9-NEXT: v_cmp_o_f32_e64 s[22:23], v10, v26
-; GFX9-NEXT: v_max_f32_e32 v10, v10, v26
-; GFX9-NEXT: v_cmp_o_f32_e64 s[24:25], v11, v27
-; GFX9-NEXT: v_max_f32_e32 v11, v11, v27
-; GFX9-NEXT: v_cmp_o_f32_e64 s[26:27], v12, v28
-; GFX9-NEXT: v_max_f32_e32 v12, v12, v28
-; GFX9-NEXT: v_max_f32_e32 v19, v14, v30
-; GFX9-NEXT: v_cmp_o_f32_e64 s[30:31], v14, v30
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v13, v17, v18, s[28:29]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v17, v0, s[16:17]
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v17, v2, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v17, v3, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v17, v4, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v17, v5, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v17, v6, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v17, v7, s[14:15]
-; GFX9-NEXT: v_cndmask_b32_e64 v8, v17, v8, s[18:19]
-; GFX9-NEXT: v_cndmask_b32_e64 v9, v17, v9, s[20:21]
-; GFX9-NEXT: v_cndmask_b32_e64 v10, v17, v10, s[22:23]
-; GFX9-NEXT: v_cndmask_b32_e64 v11, v17, v11, s[24:25]
-; GFX9-NEXT: v_cndmask_b32_e64 v12, v17, v12, s[26:27]
-; GFX9-NEXT: v_cndmask_b32_e64 v14, v17, v19, s[30:31]
-; GFX9-NEXT: v_readlane_b32 s31, v31, 1
-; GFX9-NEXT: v_readlane_b32 s30, v31, 0
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v18, v15, v16
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v15, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v15, v17, v18, vcc
-; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_v16f32:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: scratch_load_dword v31, off, s32
-; GFX940-NEXT: v_mov_b32_e32 v32, 0x7fc00000
-; GFX940-NEXT: v_max_f32_e32 v33, v0, v16
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v16
-; GFX940-NEXT: v_max_f32_e32 v34, v1, v17
-; GFX940-NEXT: v_max_f32_e32 v35, v2, v18
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v32, v33, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v17
-; GFX940-NEXT: v_max_f32_e32 v36, v3, v19
-; GFX940-NEXT: v_max_f32_e32 v37, v4, v20
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v32, v34, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v18
-; GFX940-NEXT: v_max_f32_e32 v38, v5, v21
-; GFX940-NEXT: v_max_f32_e32 v39, v6, v22
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v32, v35, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v3, v19
-; GFX940-NEXT: v_max_f32_e32 v48, v7, v23
-; GFX940-NEXT: v_max_f32_e32 v49, v8, v24
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v32, v36, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v4, v20
-; GFX940-NEXT: v_max_f32_e32 v50, v9, v25
-; GFX940-NEXT: v_max_f32_e32 v51, v10, v26
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v32, v37, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v5, v21
-; GFX940-NEXT: v_max_f32_e32 v52, v11, v27
-; GFX940-NEXT: v_max_f32_e32 v53, v12, v28
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v32, v38, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v6, v22
-; GFX940-NEXT: v_max_f32_e32 v54, v13, v29
-; GFX940-NEXT: v_max_f32_e32 v55, v14, v30
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v32, v39, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v7, v23
-; GFX940-NEXT: s_waitcnt vmcnt(0)
-; GFX940-NEXT: v_max_f32_e32 v16, v15, v31
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v32, v48, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v8, v24
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v32, v49, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v9, v25
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v9, v32, v50, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v10, v26
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v10, v32, v51, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v11, v27
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v11, v32, v52, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v12, v28
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v12, v32, v53, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v13, v29
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v13, v32, v54, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v14, v30
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v14, v32, v55, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v15, v31
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v15, v32, v16, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v16f32:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX900-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; GFX900-NEXT: s_mov_b64 exec, s[4:5]
+; GFX900-NEXT: v_cmp_o_f32_e64 s[16:17], v0, v16
+; GFX900-NEXT: v_max_f32_e32 v0, v0, v16
+; GFX900-NEXT: buffer_load_dword v16, off, s[0:3], s32
+; GFX900-NEXT: v_writelane_b32 v31, s30, 0
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v1, v17
+; GFX900-NEXT: v_max_f32_e32 v1, v1, v17
+; GFX900-NEXT: v_cmp_o_f32_e64 s[4:5], v2, v18
+; GFX900-NEXT: v_max_f32_e32 v2, v2, v18
+; GFX900-NEXT: v_mov_b32_e32 v17, 0x7fc00000
+; GFX900-NEXT: v_max_f32_e32 v18, v13, v29
+; GFX900-NEXT: v_cmp_o_f32_e64 s[28:29], v13, v29
+; GFX900-NEXT: v_writelane_b32 v31, s31, 1
+; GFX900-NEXT: v_cmp_o_f32_e64 s[6:7], v3, v19
+; GFX900-NEXT: v_max_f32_e32 v3, v3, v19
+; GFX900-NEXT: v_cmp_o_f32_e64 s[8:9], v4, v20
+; GFX900-NEXT: v_max_f32_e32 v4, v4, v20
+; GFX900-NEXT: v_cmp_o_f32_e64 s[10:11], v5, v21
+; GFX900-NEXT: v_max_f32_e32 v5, v5, v21
+; GFX900-NEXT: v_cmp_o_f32_e64 s[12:13], v6, v22
+; GFX900-NEXT: v_max_f32_e32 v6, v6, v22
+; GFX900-NEXT: v_cmp_o_f32_e64 s[14:15], v7, v23
+; GFX900-NEXT: v_max_f32_e32 v7, v7, v23
+; GFX900-NEXT: v_cmp_o_f32_e64 s[18:19], v8, v24
+; GFX900-NEXT: v_max_f32_e32 v8, v8, v24
+; GFX900-NEXT: v_cmp_o_f32_e64 s[20:21], v9, v25
+; GFX900-NEXT: v_max_f32_e32 v9, v9, v25
+; GFX900-NEXT: v_cmp_o_f32_e64 s[22:23], v10, v26
+; GFX900-NEXT: v_max_f32_e32 v10, v10, v26
+; GFX900-NEXT: v_cmp_o_f32_e64 s[24:25], v11, v27
+; GFX900-NEXT: v_max_f32_e32 v11, v11, v27
+; GFX900-NEXT: v_cmp_o_f32_e64 s[26:27], v12, v28
+; GFX900-NEXT: v_max_f32_e32 v12, v12, v28
+; GFX900-NEXT: v_max_f32_e32 v19, v14, v30
+; GFX900-NEXT: v_cmp_o_f32_e64 s[30:31], v14, v30
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc
+; GFX900-NEXT: v_cndmask_b32_e64 v13, v17, v18, s[28:29]
+; GFX900-NEXT: v_cndmask_b32_e64 v0, v17, v0, s[16:17]
+; GFX900-NEXT: v_cndmask_b32_e64 v2, v17, v2, s[4:5]
+; GFX900-NEXT: v_cndmask_b32_e64 v3, v17, v3, s[6:7]
+; GFX900-NEXT: v_cndmask_b32_e64 v4, v17, v4, s[8:9]
+; GFX900-NEXT: v_cndmask_b32_e64 v5, v17, v5, s[10:11]
+; GFX900-NEXT: v_cndmask_b32_e64 v6, v17, v6, s[12:13]
+; GFX900-NEXT: v_cndmask_b32_e64 v7, v17, v7, s[14:15]
+; GFX900-NEXT: v_cndmask_b32_e64 v8, v17, v8, s[18:19]
+; GFX900-NEXT: v_cndmask_b32_e64 v9, v17, v9, s[20:21]
+; GFX900-NEXT: v_cndmask_b32_e64 v10, v17, v10, s[22:23]
+; GFX900-NEXT: v_cndmask_b32_e64 v11, v17, v11, s[24:25]
+; GFX900-NEXT: v_cndmask_b32_e64 v12, v17, v12, s[26:27]
+; GFX900-NEXT: v_cndmask_b32_e64 v14, v17, v19, s[30:31]
+; GFX900-NEXT: v_readlane_b32 s31, v31, 1
+; GFX900-NEXT: v_readlane_b32 s30, v31, 0
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_max_f32_e32 v18, v15, v16
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v15, v16
+; GFX900-NEXT: v_cndmask_b32_e32 v15, v17, v18, vcc
+; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; GFX900-NEXT: s_mov_b64 exec, s[4:5]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v16f32:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: scratch_load_dword v31, off, s32
+; GFX950-NEXT: v_mov_b32_e32 v32, 0x7fc00000
+; GFX950-NEXT: v_max_f32_e32 v33, v0, v16
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v16
+; GFX950-NEXT: v_max_f32_e32 v34, v1, v17
+; GFX950-NEXT: v_max_f32_e32 v35, v2, v18
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v32, v33, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v17
+; GFX950-NEXT: v_max_f32_e32 v36, v3, v19
+; GFX950-NEXT: v_max_f32_e32 v37, v4, v20
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v32, v34, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v2, v18
+; GFX950-NEXT: v_max_f32_e32 v38, v5, v21
+; GFX950-NEXT: v_max_f32_e32 v39, v6, v22
+; GFX950-NEXT: v_cndmask_b32_e32 v2, v32, v35, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v3, v19
+; GFX950-NEXT: v_max_f32_e32 v48, v7, v23
+; GFX950-NEXT: v_max_f32_e32 v49, v8, v24
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v32, v36, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v4, v20
+; GFX950-NEXT: v_max_f32_e32 v50, v9, v25
+; GFX950-NEXT: v_max_f32_e32 v51, v10, v26
+; GFX950-NEXT: v_cndmask_b32_e32 v4, v32, v37, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v5, v21
+; GFX950-NEXT: v_max_f32_e32 v52, v11, v27
+; GFX950-NEXT: v_max_f32_e32 v53, v12, v28
+; GFX950-NEXT: v_cndmask_b32_e32 v5, v32, v38, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v6, v22
+; GFX950-NEXT: v_max_f32_e32 v54, v13, v29
+; GFX950-NEXT: v_max_f32_e32 v55, v14, v30
+; GFX950-NEXT: v_cndmask_b32_e32 v6, v32, v39, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v7, v23
+; GFX950-NEXT: s_waitcnt vmcnt(0)
+; GFX950-NEXT: v_max_f32_e32 v16, v15, v31
+; GFX950-NEXT: v_cndmask_b32_e32 v7, v32, v48, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v8, v24
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v8, v32, v49, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v9, v25
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v9, v32, v50, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v10, v26
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v10, v32, v51, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v11, v27
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v11, v32, v52, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v12, v28
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v12, v32, v53, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v13, v29
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v13, v32, v54, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v14, v30
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v14, v32, v55, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v15, v31
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v15, v32, v16, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v16f32:
; GFX10: ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll
index 80a0a194713d90..e354ec6fb3dd78 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll
@@ -2,7 +2,8 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck -check-prefixes=GCN,GFX7 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GCN,GFX8 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GCN,GFX940 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX900 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX950 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX10 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GCN,GFX12 %s
@@ -28,26 +29,26 @@ define double @v_maximum_f64(double %src0, double %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_f64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX9-NEXT: v_mov_b32_e32 v1, 0x7ff80000
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_f64:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX940-NEXT: v_mov_b32_e32 v1, 0x7ff80000
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_f64:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
+; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX900-NEXT: v_mov_b32_e32 v1, 0x7ff80000
+; GFX900-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_f64:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX950-NEXT: v_mov_b32_e32 v1, 0x7ff80000
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_f64:
; GFX10: ; %bb.0:
@@ -100,12 +101,6 @@ define double @v_maximum_f64__nnan(double %src0, double %src1) {
; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_maximum_f64__nnan:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_maximum_f64__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -152,26 +147,26 @@ define double @v_maximum_f64__nsz(double %src0, double %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_f64__nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX9-NEXT: v_mov_b32_e32 v1, 0x7ff80000
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_f64__nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX940-NEXT: v_mov_b32_e32 v1, 0x7ff80000
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_f64__nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
+; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX900-NEXT: v_mov_b32_e32 v1, 0x7ff80000
+; GFX900-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_f64__nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX950-NEXT: v_mov_b32_e32 v1, 0x7ff80000
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_f64__nsz:
; GFX10: ; %bb.0:
@@ -224,12 +219,6 @@ define double @v_maximum_f64__nnan_nsz(double %src0, double %src1) {
; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_maximum_f64__nnan_nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_maximum_f64__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -278,28 +267,28 @@ define double @v_maximum_f64__nnan_src0(double %arg0, double %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_f64__nnan_src0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_add_f64 v[0:1], v[0:1], 1.0
-; GFX9-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX9-NEXT: v_mov_b32_e32 v1, 0x7ff80000
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_f64__nnan_src0:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_add_f64 v[0:1], v[0:1], 1.0
-; GFX940-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX940-NEXT: v_mov_b32_e32 v1, 0x7ff80000
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_f64__nnan_src0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_add_f64 v[0:1], v[0:1], 1.0
+; GFX900-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
+; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX900-NEXT: v_mov_b32_e32 v1, 0x7ff80000
+; GFX900-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_f64__nnan_src0:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_add_f64 v[0:1], v[0:1], 1.0
+; GFX950-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX950-NEXT: v_mov_b32_e32 v1, 0x7ff80000
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_f64__nnan_src0:
; GFX10: ; %bb.0:
@@ -362,28 +351,28 @@ define double @v_maximum_f64__nnan_src1(double %src0, double %arg1) {
; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_f64__nnan_src1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_add_f64 v[2:3], v[2:3], 1.0
-; GFX9-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX9-NEXT: v_mov_b32_e32 v1, 0x7ff80000
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_f64__nnan_src1:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_add_f64 v[2:3], v[2:3], 1.0
-; GFX940-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX940-NEXT: v_mov_b32_e32 v1, 0x7ff80000
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_f64__nnan_src1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_add_f64 v[2:3], v[2:3], 1.0
+; GFX900-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
+; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX900-NEXT: v_mov_b32_e32 v1, 0x7ff80000
+; GFX900-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_f64__nnan_src1:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_add_f64 v[2:3], v[2:3], 1.0
+; GFX950-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX950-NEXT: v_mov_b32_e32 v1, 0x7ff80000
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_f64__nnan_src1:
; GFX10: ; %bb.0:
@@ -454,35 +443,35 @@ define void @s_maximum_f64(double inreg %src0, double inreg %src1) {
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: s_maximum_f64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v0, s18
-; GFX9-NEXT: v_mov_b32_e32 v1, s19
-; GFX9-NEXT: v_max_f64 v[2:3], s[16:17], v[0:1]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, s[16:17], v[0:1]
-; GFX9-NEXT: v_mov_b32_e32 v4, 0x7ff80000
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use v[0:1]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: s_maximum_f64:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
-; GFX940-NEXT: v_max_f64 v[2:3], s[0:1], v[0:1]
-; GFX940-NEXT: v_mov_b32_e32 v4, 0x7ff80000
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, s[0:1], v[0:1]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX940-NEXT: ;;#ASMSTART
-; GFX940-NEXT: ; use v[0:1]
-; GFX940-NEXT: ;;#ASMEND
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_maximum_f64:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, s18
+; GFX900-NEXT: v_mov_b32_e32 v1, s19
+; GFX900-NEXT: v_max_f64 v[2:3], s[16:17], v[0:1]
+; GFX900-NEXT: v_cmp_u_f64_e32 vcc, s[16:17], v[0:1]
+; GFX900-NEXT: v_mov_b32_e32 v4, 0x7ff80000
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX900-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use v[0:1]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: s_maximum_f64:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
+; GFX950-NEXT: v_max_f64 v[2:3], s[0:1], v[0:1]
+; GFX950-NEXT: v_mov_b32_e32 v4, 0x7ff80000
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, s[0:1], v[0:1]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX950-NEXT: ;;#ASMSTART
+; GFX950-NEXT: ; use v[0:1]
+; GFX950-NEXT: ;;#ASMEND
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: s_maximum_f64:
; GFX10: ; %bb.0:
@@ -555,35 +544,35 @@ define <2 x double> @v_maximum_v2f64(<2 x double> %src0, <2 x double> %src1) {
; GFX8-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v2f64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX9-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x7ff80000
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_v2f64:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX940-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
-; GFX940-NEXT: v_mov_b32_e32 v8, 0x7ff80000
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v9, v8, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[6:7]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v5, v8, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v2f64:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
+; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX900-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
+; GFX900-NEXT: v_mov_b32_e32 v3, 0x7ff80000
+; GFX900-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc
+; GFX900-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5]
+; GFX900-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v2f64:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX950-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
+; GFX950-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v9, v8, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[6:7]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v5, v8, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v2f64:
; GFX10: ; %bb.0:
@@ -648,13 +637,6 @@ define <2 x double> @v_maximum_v2f64__nnan(<2 x double> %src0, <2 x double> %src
; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_maximum_v2f64__nnan:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
-; GFX940-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_maximum_v2f64__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -712,35 +694,35 @@ define <2 x double> @v_maximum_v2f64__nsz(<2 x double> %src0, <2 x double> %src1
; GFX8-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v2f64__nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX9-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x7ff80000
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_v2f64__nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX940-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
-; GFX940-NEXT: v_mov_b32_e32 v8, 0x7ff80000
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v9, v8, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[6:7]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v5, v8, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v2f64__nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
+; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX900-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
+; GFX900-NEXT: v_mov_b32_e32 v3, 0x7ff80000
+; GFX900-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc
+; GFX900-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5]
+; GFX900-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v2f64__nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX950-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
+; GFX950-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v9, v8, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[6:7]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v5, v8, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v2f64__nsz:
; GFX10: ; %bb.0:
@@ -805,13 +787,6 @@ define <2 x double> @v_maximum_v2f64__nnan_nsz(<2 x double> %src0, <2 x double>
; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_maximum_v2f64__nnan_nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
-; GFX940-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_maximum_v2f64__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -883,46 +858,46 @@ define void @s_maximum_v2f64(<2 x double> inreg %src0, <2 x double> inreg %src1)
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: s_maximum_v2f64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v0, s22
-; GFX9-NEXT: v_mov_b32_e32 v4, s20
-; GFX9-NEXT: v_mov_b32_e32 v1, s23
-; GFX9-NEXT: v_mov_b32_e32 v5, s21
-; GFX9-NEXT: v_max_f64 v[2:3], s[18:19], v[0:1]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, s[18:19], v[0:1]
-; GFX9-NEXT: v_max_f64 v[0:1], s[16:17], v[4:5]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], s[16:17], v[4:5]
-; GFX9-NEXT: v_mov_b32_e32 v6, 0x7ff80000
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, 0, s[4:5]
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use v[0:3]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: s_maximum_v2f64:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[18:19]
-; GFX940-NEXT: v_max_f64 v[2:3], s[2:3], v[0:1]
-; GFX940-NEXT: v_mov_b32_e32 v6, 0x7ff80000
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, s[2:3], v[0:1]
-; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[16:17]
-; GFX940-NEXT: v_max_f64 v[4:5], s[0:1], v[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, s[0:1], v[0:1]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX940-NEXT: ;;#ASMSTART
-; GFX940-NEXT: ; use v[0:3]
-; GFX940-NEXT: ;;#ASMEND
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_maximum_v2f64:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, s22
+; GFX900-NEXT: v_mov_b32_e32 v4, s20
+; GFX900-NEXT: v_mov_b32_e32 v1, s23
+; GFX900-NEXT: v_mov_b32_e32 v5, s21
+; GFX900-NEXT: v_max_f64 v[2:3], s[18:19], v[0:1]
+; GFX900-NEXT: v_cmp_u_f64_e32 vcc, s[18:19], v[0:1]
+; GFX900-NEXT: v_max_f64 v[0:1], s[16:17], v[4:5]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[4:5], s[16:17], v[4:5]
+; GFX900-NEXT: v_mov_b32_e32 v6, 0x7ff80000
+; GFX900-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc
+; GFX900-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
+; GFX900-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[4:5]
+; GFX900-NEXT: v_cndmask_b32_e64 v0, v0, 0, s[4:5]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use v[0:3]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: s_maximum_v2f64:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[18:19]
+; GFX950-NEXT: v_max_f64 v[2:3], s[2:3], v[0:1]
+; GFX950-NEXT: v_mov_b32_e32 v6, 0x7ff80000
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, s[2:3], v[0:1]
+; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[16:17]
+; GFX950-NEXT: v_max_f64 v[4:5], s[0:1], v[0:1]
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, s[0:1], v[0:1]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
+; GFX950-NEXT: ;;#ASMSTART
+; GFX950-NEXT: ; use v[0:3]
+; GFX950-NEXT: ;;#ASMEND
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: s_maximum_v2f64:
; GFX10: ; %bb.0:
@@ -1012,44 +987,44 @@ define <3 x double> @v_maximum_v3f64(<3 x double> %src0, <3 x double> %src1) {
; GFX8-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v3f64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX9-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
-; GFX9-NEXT: v_max_f64 v[8:9], v[4:5], v[10:11]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
-; GFX9-NEXT: v_mov_b32_e32 v5, 0x7ff80000
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_v3f64:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX940-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
-; GFX940-NEXT: v_mov_b32_e32 v12, 0x7ff80000
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v13, v12, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[8:9]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v12, vcc
-; GFX940-NEXT: v_max_f64 v[6:7], v[4:5], v[10:11]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[10:11]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v6, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v7, v12, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v3f64:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
+; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX900-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
+; GFX900-NEXT: v_max_f64 v[8:9], v[4:5], v[10:11]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
+; GFX900-NEXT: v_mov_b32_e32 v5, 0x7ff80000
+; GFX900-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc
+; GFX900-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5]
+; GFX900-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5]
+; GFX900-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7]
+; GFX900-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v3f64:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX950-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
+; GFX950-NEXT: v_mov_b32_e32 v12, 0x7ff80000
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v13, v12, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[8:9]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v7, v12, vcc
+; GFX950-NEXT: v_max_f64 v[6:7], v[4:5], v[10:11]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[10:11]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v4, v6, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v5, v7, v12, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v3f64:
; GFX10: ; %bb.0:
@@ -1125,14 +1100,6 @@ define <3 x double> @v_maximum_v3f64__nnan(<3 x double> %src0, <3 x double> %src
; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_maximum_v3f64__nnan:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
-; GFX940-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
-; GFX940-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_maximum_v3f64__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1201,44 +1168,44 @@ define <3 x double> @v_maximum_v3f64__nsz(<3 x double> %src0, <3 x double> %src1
; GFX8-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v3f64__nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX9-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
-; GFX9-NEXT: v_max_f64 v[8:9], v[4:5], v[10:11]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
-; GFX9-NEXT: v_mov_b32_e32 v5, 0x7ff80000
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_v3f64__nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX940-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
-; GFX940-NEXT: v_mov_b32_e32 v12, 0x7ff80000
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v13, v12, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[8:9]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v12, vcc
-; GFX940-NEXT: v_max_f64 v[6:7], v[4:5], v[10:11]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[10:11]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v6, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v7, v12, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v3f64__nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
+; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX900-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
+; GFX900-NEXT: v_max_f64 v[8:9], v[4:5], v[10:11]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
+; GFX900-NEXT: v_mov_b32_e32 v5, 0x7ff80000
+; GFX900-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc
+; GFX900-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5]
+; GFX900-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5]
+; GFX900-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7]
+; GFX900-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v3f64__nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX950-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
+; GFX950-NEXT: v_mov_b32_e32 v12, 0x7ff80000
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v13, v12, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[8:9]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v7, v12, vcc
+; GFX950-NEXT: v_max_f64 v[6:7], v[4:5], v[10:11]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[10:11]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v4, v6, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v5, v7, v12, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v3f64__nsz:
; GFX10: ; %bb.0:
@@ -1314,14 +1281,6 @@ define <3 x double> @v_maximum_v3f64__nnan_nsz(<3 x double> %src0, <3 x double>
; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_maximum_v3f64__nnan_nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
-; GFX940-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
-; GFX940-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_maximum_v3f64__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1398,53 +1357,53 @@ define <4 x double> @v_maximum_v4f64(<4 x double> %src0, <4 x double> %src1) {
; GFX8-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v4f64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
-; GFX9-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
-; GFX9-NEXT: v_max_f64 v[10:11], v[4:5], v[12:13]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
-; GFX9-NEXT: v_max_f64 v[12:13], v[6:7], v[14:15]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
-; GFX9-NEXT: v_mov_b32_e32 v7, 0x7ff80000
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_v4f64:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
-; GFX940-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
-; GFX940-NEXT: v_mov_b32_e32 v16, 0x7ff80000
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v17, v16, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[10:11]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v8, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v9, v16, vcc
-; GFX940-NEXT: v_max_f64 v[8:9], v[4:5], v[12:13]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[12:13]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v8, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v9, v16, vcc
-; GFX940-NEXT: v_max_f64 v[8:9], v[6:7], v[14:15]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[14:15]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v6, v8, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v9, v16, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v4f64:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
+; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX900-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
+; GFX900-NEXT: v_max_f64 v[10:11], v[4:5], v[12:13]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
+; GFX900-NEXT: v_max_f64 v[12:13], v[6:7], v[14:15]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
+; GFX900-NEXT: v_mov_b32_e32 v7, 0x7ff80000
+; GFX900-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc
+; GFX900-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5]
+; GFX900-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5]
+; GFX900-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7]
+; GFX900-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7]
+; GFX900-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9]
+; GFX900-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v4f64:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX950-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
+; GFX950-NEXT: v_mov_b32_e32 v16, 0x7ff80000
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v17, v16, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[10:11]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v8, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v9, v16, vcc
+; GFX950-NEXT: v_max_f64 v[8:9], v[4:5], v[12:13]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[12:13]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v4, v8, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v5, v9, v16, vcc
+; GFX950-NEXT: v_max_f64 v[8:9], v[6:7], v[14:15]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[14:15]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v6, v8, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v7, v9, v16, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v4f64:
; GFX10: ; %bb.0:
@@ -1532,15 +1491,6 @@ define <4 x double> @v_maximum_v4f64__nnan(<4 x double> %src0, <4 x double> %src
; GFX9-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_maximum_v4f64__nnan:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
-; GFX940-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
-; GFX940-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
-; GFX940-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_maximum_v4f64__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1620,53 +1570,53 @@ define <4 x double> @v_maximum_v4f64__nsz(<4 x double> %src0, <4 x double> %src1
; GFX8-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v4f64__nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
-; GFX9-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
-; GFX9-NEXT: v_max_f64 v[10:11], v[4:5], v[12:13]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
-; GFX9-NEXT: v_max_f64 v[12:13], v[6:7], v[14:15]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
-; GFX9-NEXT: v_mov_b32_e32 v7, 0x7ff80000
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_v4f64__nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
-; GFX940-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
-; GFX940-NEXT: v_mov_b32_e32 v16, 0x7ff80000
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v17, v16, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[10:11]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v8, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v9, v16, vcc
-; GFX940-NEXT: v_max_f64 v[8:9], v[4:5], v[12:13]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[12:13]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v8, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v9, v16, vcc
-; GFX940-NEXT: v_max_f64 v[8:9], v[6:7], v[14:15]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[14:15]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v6, v8, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v9, v16, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v4f64__nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
+; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX900-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
+; GFX900-NEXT: v_max_f64 v[10:11], v[4:5], v[12:13]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
+; GFX900-NEXT: v_max_f64 v[12:13], v[6:7], v[14:15]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
+; GFX900-NEXT: v_mov_b32_e32 v7, 0x7ff80000
+; GFX900-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc
+; GFX900-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5]
+; GFX900-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5]
+; GFX900-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7]
+; GFX900-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7]
+; GFX900-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9]
+; GFX900-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v4f64__nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX950-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
+; GFX950-NEXT: v_mov_b32_e32 v16, 0x7ff80000
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v17, v16, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[10:11]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v8, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v9, v16, vcc
+; GFX950-NEXT: v_max_f64 v[8:9], v[4:5], v[12:13]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[12:13]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v4, v8, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v5, v9, v16, vcc
+; GFX950-NEXT: v_max_f64 v[8:9], v[6:7], v[14:15]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[14:15]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v6, v8, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v7, v9, v16, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v4f64__nsz:
; GFX10: ; %bb.0:
@@ -1754,15 +1704,6 @@ define <4 x double> @v_maximum_v4f64__nnan_nsz(<4 x double> %src0, <4 x double>
; GFX9-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_maximum_v4f64__nnan_nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
-; GFX940-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
-; GFX940-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
-; GFX940-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_maximum_v4f64__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1878,89 +1819,89 @@ define <8 x double> @v_maximum_v8f64(<8 x double> %src0, <8 x double> %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v15, v19, v34, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v8f64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32
-; GFX9-NEXT: v_max_f64 v[32:33], v[2:3], v[18:19]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[18:19]
-; GFX9-NEXT: v_max_f64 v[18:19], v[4:5], v[20:21]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[4:5], v[20:21]
-; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[16:17]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[8:9], v[0:1], v[16:17]
-; GFX9-NEXT: v_mov_b32_e32 v34, 0x7ff80000
-; GFX9-NEXT: v_max_f64 v[20:21], v[6:7], v[22:23]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[6:7], v[6:7], v[22:23]
-; GFX9-NEXT: v_max_f64 v[16:17], v[8:9], v[24:25]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[24:25]
-; GFX9-NEXT: v_max_f64 v[22:23], v[10:11], v[26:27]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[12:13], v[10:11], v[26:27]
-; GFX9-NEXT: v_max_f64 v[24:25], v[12:13], v[28:29]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[14:15], v[12:13], v[28:29]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v3, v34, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v32, 0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v33, v34, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v18, 0, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v19, v34, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v20, 0, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v21, v34, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v8, v16, 0, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v9, v17, v34, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v10, v22, 0, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v11, v23, v34, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v12, v24, 0, s[14:15]
-; GFX9-NEXT: v_cndmask_b32_e64 v13, v25, v34, s[14:15]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_max_f64 v[18:19], v[14:15], v[30:31]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[30:31]
-; GFX9-NEXT: v_cndmask_b32_e64 v14, v18, 0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v15, v19, v34, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_v8f64:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: scratch_load_dword v31, off, s32
-; GFX940-NEXT: v_mov_b32_e32 v54, 0x7ff80000
-; GFX940-NEXT: v_max_f64 v[32:33], v[0:1], v[16:17]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[16:17]
-; GFX940-NEXT: v_max_f64 v[34:35], v[2:3], v[18:19]
-; GFX940-NEXT: v_max_f64 v[36:37], v[4:5], v[20:21]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v32, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v33, v54, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[18:19]
-; GFX940-NEXT: v_max_f64 v[38:39], v[6:7], v[22:23]
-; GFX940-NEXT: v_max_f64 v[48:49], v[8:9], v[24:25]
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v34, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v35, v54, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[20:21]
-; GFX940-NEXT: v_max_f64 v[50:51], v[10:11], v[26:27]
-; GFX940-NEXT: v_max_f64 v[52:53], v[12:13], v[28:29]
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v36, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v37, v54, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[22:23]
-; GFX940-NEXT: s_waitcnt vmcnt(0)
-; GFX940-NEXT: v_max_f64 v[16:17], v[14:15], v[30:31]
-; GFX940-NEXT: v_cndmask_b32_e64 v6, v38, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v39, v54, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[8:9], v[24:25]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v8, v48, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v9, v49, v54, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[10:11], v[26:27]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v10, v50, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v11, v51, v54, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[12:13], v[28:29]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v12, v52, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v13, v53, v54, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[30:31]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v14, v16, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v15, v17, v54, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v8f64:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32
+; GFX900-NEXT: v_max_f64 v[32:33], v[2:3], v[18:19]
+; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[18:19]
+; GFX900-NEXT: v_max_f64 v[18:19], v[4:5], v[20:21]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[4:5], v[4:5], v[20:21]
+; GFX900-NEXT: v_max_f64 v[2:3], v[0:1], v[16:17]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[8:9], v[0:1], v[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v34, 0x7ff80000
+; GFX900-NEXT: v_max_f64 v[20:21], v[6:7], v[22:23]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[6:7], v[6:7], v[22:23]
+; GFX900-NEXT: v_max_f64 v[16:17], v[8:9], v[24:25]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[24:25]
+; GFX900-NEXT: v_max_f64 v[22:23], v[10:11], v[26:27]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[12:13], v[10:11], v[26:27]
+; GFX900-NEXT: v_max_f64 v[24:25], v[12:13], v[28:29]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[14:15], v[12:13], v[28:29]
+; GFX900-NEXT: v_cndmask_b32_e64 v0, v2, 0, s[8:9]
+; GFX900-NEXT: v_cndmask_b32_e64 v1, v3, v34, s[8:9]
+; GFX900-NEXT: v_cndmask_b32_e64 v2, v32, 0, vcc
+; GFX900-NEXT: v_cndmask_b32_e32 v3, v33, v34, vcc
+; GFX900-NEXT: v_cndmask_b32_e64 v4, v18, 0, s[4:5]
+; GFX900-NEXT: v_cndmask_b32_e64 v5, v19, v34, s[4:5]
+; GFX900-NEXT: v_cndmask_b32_e64 v6, v20, 0, s[6:7]
+; GFX900-NEXT: v_cndmask_b32_e64 v7, v21, v34, s[6:7]
+; GFX900-NEXT: v_cndmask_b32_e64 v8, v16, 0, s[10:11]
+; GFX900-NEXT: v_cndmask_b32_e64 v9, v17, v34, s[10:11]
+; GFX900-NEXT: v_cndmask_b32_e64 v10, v22, 0, s[12:13]
+; GFX900-NEXT: v_cndmask_b32_e64 v11, v23, v34, s[12:13]
+; GFX900-NEXT: v_cndmask_b32_e64 v12, v24, 0, s[14:15]
+; GFX900-NEXT: v_cndmask_b32_e64 v13, v25, v34, s[14:15]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_max_f64 v[18:19], v[14:15], v[30:31]
+; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[30:31]
+; GFX900-NEXT: v_cndmask_b32_e64 v14, v18, 0, vcc
+; GFX900-NEXT: v_cndmask_b32_e32 v15, v19, v34, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v8f64:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: scratch_load_dword v31, off, s32
+; GFX950-NEXT: v_mov_b32_e32 v54, 0x7ff80000
+; GFX950-NEXT: v_max_f64 v[32:33], v[0:1], v[16:17]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[16:17]
+; GFX950-NEXT: v_max_f64 v[34:35], v[2:3], v[18:19]
+; GFX950-NEXT: v_max_f64 v[36:37], v[4:5], v[20:21]
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v32, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v33, v54, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[18:19]
+; GFX950-NEXT: v_max_f64 v[38:39], v[6:7], v[22:23]
+; GFX950-NEXT: v_max_f64 v[48:49], v[8:9], v[24:25]
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v34, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v35, v54, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[20:21]
+; GFX950-NEXT: v_max_f64 v[50:51], v[10:11], v[26:27]
+; GFX950-NEXT: v_max_f64 v[52:53], v[12:13], v[28:29]
+; GFX950-NEXT: v_cndmask_b32_e64 v4, v36, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v5, v37, v54, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[22:23]
+; GFX950-NEXT: s_waitcnt vmcnt(0)
+; GFX950-NEXT: v_max_f64 v[16:17], v[14:15], v[30:31]
+; GFX950-NEXT: v_cndmask_b32_e64 v6, v38, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v7, v39, v54, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[8:9], v[24:25]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v8, v48, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v9, v49, v54, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[10:11], v[26:27]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v10, v50, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v11, v51, v54, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[12:13], v[28:29]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v12, v52, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v13, v53, v54, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[30:31]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v14, v16, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v15, v17, v54, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v8f64:
; GFX10: ; %bb.0:
@@ -2332,295 +2273,295 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1)
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v16f64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX9-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4
-; GFX9-NEXT: v_writelane_b32 v34, s30, 0
-; GFX9-NEXT: v_writelane_b32 v34, s31, 1
-; GFX9-NEXT: v_writelane_b32 v34, s34, 2
-; GFX9-NEXT: v_writelane_b32 v34, s35, 3
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[31:32]
-; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[31:32]
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:16
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:12
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[31:32]
-; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[31:32]
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:24
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:20
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, 0, s[4:5]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[31:32]
-; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[31:32]
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:32
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v4, 0, s[6:7]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[31:32]
-; GFX9-NEXT: v_max_f64 v[6:7], v[6:7], v[31:32]
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:36
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:40
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v6, 0, s[8:9]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[31:32]
-; GFX9-NEXT: v_max_f64 v[8:9], v[8:9], v[31:32]
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:48
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:44
-; GFX9-NEXT: v_cndmask_b32_e64 v8, v8, 0, s[10:11]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e64 s[12:13], v[10:11], v[31:32]
-; GFX9-NEXT: v_max_f64 v[10:11], v[10:11], v[31:32]
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:56
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:52
-; GFX9-NEXT: v_cndmask_b32_e64 v10, v10, 0, s[12:13]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e64 s[14:15], v[12:13], v[31:32]
-; GFX9-NEXT: v_max_f64 v[12:13], v[12:13], v[31:32]
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:64
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:60
-; GFX9-NEXT: v_cndmask_b32_e64 v12, v12, 0, s[14:15]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e64 s[16:17], v[14:15], v[31:32]
-; GFX9-NEXT: v_max_f64 v[14:15], v[14:15], v[31:32]
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:68
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:72
-; GFX9-NEXT: v_cndmask_b32_e64 v14, v14, 0, s[16:17]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e64 s[18:19], v[16:17], v[31:32]
-; GFX9-NEXT: v_max_f64 v[16:17], v[16:17], v[31:32]
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:80
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:76
-; GFX9-NEXT: v_cndmask_b32_e64 v16, v16, 0, s[18:19]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e64 s[20:21], v[18:19], v[31:32]
-; GFX9-NEXT: v_max_f64 v[18:19], v[18:19], v[31:32]
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:88
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:84
-; GFX9-NEXT: v_cndmask_b32_e64 v18, v18, 0, s[20:21]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e64 s[22:23], v[20:21], v[31:32]
-; GFX9-NEXT: v_max_f64 v[20:21], v[20:21], v[31:32]
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:96
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:92
-; GFX9-NEXT: v_cndmask_b32_e64 v20, v20, 0, s[22:23]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e64 s[24:25], v[22:23], v[31:32]
-; GFX9-NEXT: v_max_f64 v[22:23], v[22:23], v[31:32]
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:100
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:104
-; GFX9-NEXT: v_cndmask_b32_e64 v22, v22, 0, s[24:25]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e64 s[26:27], v[24:25], v[31:32]
-; GFX9-NEXT: v_max_f64 v[24:25], v[24:25], v[31:32]
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:112
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:108
-; GFX9-NEXT: v_cndmask_b32_e64 v24, v24, 0, s[26:27]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e64 s[28:29], v[26:27], v[31:32]
-; GFX9-NEXT: v_max_f64 v[26:27], v[26:27], v[31:32]
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:120
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116
-; GFX9-NEXT: v_cndmask_b32_e64 v26, v26, 0, s[28:29]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e64 s[30:31], v[28:29], v[31:32]
-; GFX9-NEXT: v_max_f64 v[28:29], v[28:29], v[31:32]
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32
-; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:128
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:124
-; GFX9-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[30:31]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e64 s[34:35], v[30:31], v[32:33]
-; GFX9-NEXT: v_max_f64 v[30:31], v[30:31], v[32:33]
-; GFX9-NEXT: v_mov_b32_e32 v32, 0x7ff80000
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v32, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v32, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v32, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, v32, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v9, v9, v32, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v11, v11, v32, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v13, v13, v32, s[14:15]
-; GFX9-NEXT: v_cndmask_b32_e64 v15, v15, v32, s[16:17]
-; GFX9-NEXT: v_cndmask_b32_e64 v17, v17, v32, s[18:19]
-; GFX9-NEXT: v_cndmask_b32_e64 v19, v19, v32, s[20:21]
-; GFX9-NEXT: v_cndmask_b32_e64 v21, v21, v32, s[22:23]
-; GFX9-NEXT: v_cndmask_b32_e64 v23, v23, v32, s[24:25]
-; GFX9-NEXT: v_cndmask_b32_e64 v25, v25, v32, s[26:27]
-; GFX9-NEXT: v_cndmask_b32_e64 v27, v27, v32, s[28:29]
-; GFX9-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[30:31]
-; GFX9-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[34:35]
-; GFX9-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[34:35]
-; GFX9-NEXT: v_readlane_b32 s35, v34, 3
-; GFX9-NEXT: v_readlane_b32 s34, v34, 2
-; GFX9-NEXT: v_readlane_b32 s31, v34, 1
-; GFX9-NEXT: v_readlane_b32 s30, v34, 0
-; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX9-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_maximum_v16f64:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_accvgpr_write_b32 a1, v40 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a2, v41 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a3, v42 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a4, v43 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a5, v44 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a6, v45 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a7, v46 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a8, v47 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a9, v56 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a10, v57 ; Reload Reuse
-; GFX940-NEXT: scratch_load_dword v37, off, s32 offset:16
-; GFX940-NEXT: scratch_load_dword v36, off, s32 offset:12
-; GFX940-NEXT: scratch_load_dword v39, off, s32 offset:24
-; GFX940-NEXT: scratch_load_dword v38, off, s32 offset:20
-; GFX940-NEXT: scratch_load_dword v49, off, s32 offset:32
-; GFX940-NEXT: scratch_load_dword v48, off, s32 offset:28
-; GFX940-NEXT: scratch_load_dword v57, off, s32 offset:8
-; GFX940-NEXT: scratch_load_dword v56, off, s32 offset:4
-; GFX940-NEXT: scratch_load_dword v47, off, s32 offset:40
-; GFX940-NEXT: scratch_load_dword v46, off, s32 offset:36
-; GFX940-NEXT: scratch_load_dword v45, off, s32 offset:48
-; GFX940-NEXT: scratch_load_dword v44, off, s32 offset:44
-; GFX940-NEXT: scratch_load_dword v43, off, s32 offset:56
-; GFX940-NEXT: scratch_load_dword v42, off, s32 offset:52
-; GFX940-NEXT: scratch_load_dword v41, off, s32 offset:64
-; GFX940-NEXT: scratch_load_dword v40, off, s32 offset:60
-; GFX940-NEXT: scratch_load_dword v55, off, s32 offset:72
-; GFX940-NEXT: scratch_load_dword v54, off, s32 offset:68
-; GFX940-NEXT: scratch_load_dword v53, off, s32 offset:80
-; GFX940-NEXT: scratch_load_dword v52, off, s32 offset:76
-; GFX940-NEXT: scratch_load_dword v51, off, s32 offset:88
-; GFX940-NEXT: scratch_load_dword v50, off, s32 offset:84
-; GFX940-NEXT: scratch_load_dword v35, off, s32 offset:96
-; GFX940-NEXT: scratch_load_dword v34, off, s32 offset:92
-; GFX940-NEXT: scratch_load_dword v31, off, s32
-; GFX940-NEXT: scratch_load_dword v33, off, s32 offset:104
-; GFX940-NEXT: scratch_load_dword v32, off, s32 offset:100
-; GFX940-NEXT: v_accvgpr_write_b32 a11, v58 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a12, v59 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a13, v60 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a14, v61 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a15, v62 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a16, v63 ; Reload Reuse
-; GFX940-NEXT: s_waitcnt vmcnt(25)
-; GFX940-NEXT: v_max_f64 v[58:59], v[2:3], v[36:37]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[36:37]
-; GFX940-NEXT: scratch_load_dword v37, off, s32 offset:112
-; GFX940-NEXT: scratch_load_dword v36, off, s32 offset:108
-; GFX940-NEXT: s_waitcnt vmcnt(25)
-; GFX940-NEXT: v_max_f64 v[60:61], v[4:5], v[38:39]
-; GFX940-NEXT: v_cmp_u_f64_e64 s[0:1], v[4:5], v[38:39]
-; GFX940-NEXT: scratch_load_dword v39, off, s32 offset:120
-; GFX940-NEXT: scratch_load_dword v38, off, s32 offset:116
-; GFX940-NEXT: s_waitcnt vmcnt(25)
-; GFX940-NEXT: v_max_f64 v[62:63], v[6:7], v[48:49]
-; GFX940-NEXT: v_cmp_u_f64_e64 s[2:3], v[6:7], v[48:49]
-; GFX940-NEXT: scratch_load_dword v49, off, s32 offset:128
-; GFX940-NEXT: scratch_load_dword v48, off, s32 offset:124
-; GFX940-NEXT: s_waitcnt vmcnt(25)
-; GFX940-NEXT: v_max_f64 v[2:3], v[0:1], v[56:57]
-; GFX940-NEXT: v_cmp_u_f64_e64 s[4:5], v[0:1], v[56:57]
-; GFX940-NEXT: v_mov_b32_e32 v0, 0x7ff80000
-; GFX940-NEXT: s_waitcnt vmcnt(23)
-; GFX940-NEXT: v_max_f64 v[56:57], v[8:9], v[46:47]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v2, 0, s[4:5]
-; GFX940-NEXT: v_accvgpr_write_b32 a0, v1
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v3, v0, s[4:5]
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v58, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v59, v0, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[8:9], v[46:47]
-; GFX940-NEXT: s_waitcnt vmcnt(21)
-; GFX940-NEXT: v_max_f64 v[46:47], v[10:11], v[44:45]
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v60, 0, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v8, v56, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v9, v57, v0, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[10:11], v[44:45]
-; GFX940-NEXT: s_waitcnt vmcnt(19)
-; GFX940-NEXT: v_max_f64 v[44:45], v[12:13], v[42:43]
-; GFX940-NEXT: v_cndmask_b32_e64 v5, v61, v0, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v10, v46, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v11, v47, v0, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[12:13], v[42:43]
-; GFX940-NEXT: s_waitcnt vmcnt(17)
-; GFX940-NEXT: v_max_f64 v[42:43], v[14:15], v[40:41]
-; GFX940-NEXT: v_cndmask_b32_e64 v6, v62, 0, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v12, v44, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v13, v45, v0, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[40:41]
-; GFX940-NEXT: s_waitcnt vmcnt(15)
-; GFX940-NEXT: v_max_f64 v[40:41], v[16:17], v[54:55]
-; GFX940-NEXT: v_cndmask_b32_e64 v7, v63, v0, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v14, v42, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v15, v43, v0, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[16:17], v[54:55]
-; GFX940-NEXT: s_waitcnt vmcnt(13)
-; GFX940-NEXT: v_max_f64 v[54:55], v[18:19], v[52:53]
-; GFX940-NEXT: v_accvgpr_read_b32 v63, a16 ; Reload Reuse
-; GFX940-NEXT: v_cndmask_b32_e64 v16, v40, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v17, v41, v0, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[18:19], v[52:53]
-; GFX940-NEXT: s_waitcnt vmcnt(11)
-; GFX940-NEXT: v_max_f64 v[52:53], v[20:21], v[50:51]
-; GFX940-NEXT: v_accvgpr_read_b32 v62, a15 ; Reload Reuse
-; GFX940-NEXT: v_cndmask_b32_e64 v18, v54, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v19, v55, v0, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[20:21], v[50:51]
-; GFX940-NEXT: s_waitcnt vmcnt(9)
-; GFX940-NEXT: v_max_f64 v[50:51], v[22:23], v[34:35]
-; GFX940-NEXT: v_accvgpr_read_b32 v61, a14 ; Reload Reuse
-; GFX940-NEXT: v_cndmask_b32_e64 v20, v52, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v21, v53, v0, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[22:23], v[34:35]
-; GFX940-NEXT: s_waitcnt vmcnt(6)
-; GFX940-NEXT: v_max_f64 v[34:35], v[24:25], v[32:33]
-; GFX940-NEXT: v_accvgpr_read_b32 v60, a13 ; Reload Reuse
-; GFX940-NEXT: v_cndmask_b32_e64 v22, v50, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v23, v51, v0, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[24:25], v[32:33]
-; GFX940-NEXT: v_accvgpr_read_b32 v59, a12 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v58, a11 ; Reload Reuse
-; GFX940-NEXT: v_cndmask_b32_e64 v24, v34, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v25, v35, v0, vcc
-; GFX940-NEXT: v_accvgpr_read_b32 v57, a10 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v56, a9 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v47, a8 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v46, a7 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v45, a6 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v44, a5 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v43, a4 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v42, a3 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v41, a2 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v40, a1 ; Reload Reuse
-; GFX940-NEXT: s_waitcnt vmcnt(4)
-; GFX940-NEXT: v_max_f64 v[32:33], v[26:27], v[36:37]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[26:27], v[36:37]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v26, v32, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v27, v33, v0, vcc
-; GFX940-NEXT: s_waitcnt vmcnt(2)
-; GFX940-NEXT: v_max_f64 v[32:33], v[28:29], v[38:39]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[28:29], v[38:39]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v28, v32, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v29, v33, v0, vcc
-; GFX940-NEXT: s_waitcnt vmcnt(0)
-; GFX940-NEXT: v_max_f64 v[32:33], v[30:31], v[48:49]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[30:31], v[48:49]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v30, v32, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v31, v33, v0, vcc
-; GFX940-NEXT: v_accvgpr_read_b32 v0, a0
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v16f64:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX900-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
+; GFX900-NEXT: s_mov_b64 exec, s[4:5]
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4
+; GFX900-NEXT: v_writelane_b32 v34, s30, 0
+; GFX900-NEXT: v_writelane_b32 v34, s31, 1
+; GFX900-NEXT: v_writelane_b32 v34, s34, 2
+; GFX900-NEXT: v_writelane_b32 v34, s35, 3
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[31:32]
+; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[31:32]
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:16
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:12
+; GFX900-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[31:32]
+; GFX900-NEXT: v_max_f64 v[2:3], v[2:3], v[31:32]
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:24
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:20
+; GFX900-NEXT: v_cndmask_b32_e64 v2, v2, 0, s[4:5]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[31:32]
+; GFX900-NEXT: v_max_f64 v[4:5], v[4:5], v[31:32]
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:32
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28
+; GFX900-NEXT: v_cndmask_b32_e64 v4, v4, 0, s[6:7]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[31:32]
+; GFX900-NEXT: v_max_f64 v[6:7], v[6:7], v[31:32]
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:36
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:40
+; GFX900-NEXT: v_cndmask_b32_e64 v6, v6, 0, s[8:9]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[31:32]
+; GFX900-NEXT: v_max_f64 v[8:9], v[8:9], v[31:32]
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:48
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:44
+; GFX900-NEXT: v_cndmask_b32_e64 v8, v8, 0, s[10:11]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e64 s[12:13], v[10:11], v[31:32]
+; GFX900-NEXT: v_max_f64 v[10:11], v[10:11], v[31:32]
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:56
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:52
+; GFX900-NEXT: v_cndmask_b32_e64 v10, v10, 0, s[12:13]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e64 s[14:15], v[12:13], v[31:32]
+; GFX900-NEXT: v_max_f64 v[12:13], v[12:13], v[31:32]
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:64
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:60
+; GFX900-NEXT: v_cndmask_b32_e64 v12, v12, 0, s[14:15]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e64 s[16:17], v[14:15], v[31:32]
+; GFX900-NEXT: v_max_f64 v[14:15], v[14:15], v[31:32]
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:68
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:72
+; GFX900-NEXT: v_cndmask_b32_e64 v14, v14, 0, s[16:17]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e64 s[18:19], v[16:17], v[31:32]
+; GFX900-NEXT: v_max_f64 v[16:17], v[16:17], v[31:32]
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:80
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:76
+; GFX900-NEXT: v_cndmask_b32_e64 v16, v16, 0, s[18:19]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e64 s[20:21], v[18:19], v[31:32]
+; GFX900-NEXT: v_max_f64 v[18:19], v[18:19], v[31:32]
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:88
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:84
+; GFX900-NEXT: v_cndmask_b32_e64 v18, v18, 0, s[20:21]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e64 s[22:23], v[20:21], v[31:32]
+; GFX900-NEXT: v_max_f64 v[20:21], v[20:21], v[31:32]
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:96
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:92
+; GFX900-NEXT: v_cndmask_b32_e64 v20, v20, 0, s[22:23]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e64 s[24:25], v[22:23], v[31:32]
+; GFX900-NEXT: v_max_f64 v[22:23], v[22:23], v[31:32]
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:100
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:104
+; GFX900-NEXT: v_cndmask_b32_e64 v22, v22, 0, s[24:25]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e64 s[26:27], v[24:25], v[31:32]
+; GFX900-NEXT: v_max_f64 v[24:25], v[24:25], v[31:32]
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:112
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:108
+; GFX900-NEXT: v_cndmask_b32_e64 v24, v24, 0, s[26:27]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e64 s[28:29], v[26:27], v[31:32]
+; GFX900-NEXT: v_max_f64 v[26:27], v[26:27], v[31:32]
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:120
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116
+; GFX900-NEXT: v_cndmask_b32_e64 v26, v26, 0, s[28:29]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e64 s[30:31], v[28:29], v[31:32]
+; GFX900-NEXT: v_max_f64 v[28:29], v[28:29], v[31:32]
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32
+; GFX900-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:128
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:124
+; GFX900-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[30:31]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e64 s[34:35], v[30:31], v[32:33]
+; GFX900-NEXT: v_max_f64 v[30:31], v[30:31], v[32:33]
+; GFX900-NEXT: v_mov_b32_e32 v32, 0x7ff80000
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v32, vcc
+; GFX900-NEXT: v_cndmask_b32_e64 v3, v3, v32, s[4:5]
+; GFX900-NEXT: v_cndmask_b32_e64 v5, v5, v32, s[6:7]
+; GFX900-NEXT: v_cndmask_b32_e64 v7, v7, v32, s[8:9]
+; GFX900-NEXT: v_cndmask_b32_e64 v9, v9, v32, s[10:11]
+; GFX900-NEXT: v_cndmask_b32_e64 v11, v11, v32, s[12:13]
+; GFX900-NEXT: v_cndmask_b32_e64 v13, v13, v32, s[14:15]
+; GFX900-NEXT: v_cndmask_b32_e64 v15, v15, v32, s[16:17]
+; GFX900-NEXT: v_cndmask_b32_e64 v17, v17, v32, s[18:19]
+; GFX900-NEXT: v_cndmask_b32_e64 v19, v19, v32, s[20:21]
+; GFX900-NEXT: v_cndmask_b32_e64 v21, v21, v32, s[22:23]
+; GFX900-NEXT: v_cndmask_b32_e64 v23, v23, v32, s[24:25]
+; GFX900-NEXT: v_cndmask_b32_e64 v25, v25, v32, s[26:27]
+; GFX900-NEXT: v_cndmask_b32_e64 v27, v27, v32, s[28:29]
+; GFX900-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[30:31]
+; GFX900-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[34:35]
+; GFX900-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[34:35]
+; GFX900-NEXT: v_readlane_b32 s35, v34, 3
+; GFX900-NEXT: v_readlane_b32 s34, v34, 2
+; GFX900-NEXT: v_readlane_b32 s31, v34, 1
+; GFX900-NEXT: v_readlane_b32 s30, v34, 0
+; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX900-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload
+; GFX900-NEXT: s_mov_b64 exec, s[4:5]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v16f64:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_accvgpr_write_b32 a1, v40 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_write_b32 a2, v41 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_write_b32 a3, v42 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_write_b32 a4, v43 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_write_b32 a5, v44 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_write_b32 a6, v45 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_write_b32 a7, v46 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_write_b32 a8, v47 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_write_b32 a9, v56 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_write_b32 a10, v57 ; Reload Reuse
+; GFX950-NEXT: scratch_load_dword v37, off, s32 offset:16
+; GFX950-NEXT: scratch_load_dword v36, off, s32 offset:12
+; GFX950-NEXT: scratch_load_dword v39, off, s32 offset:24
+; GFX950-NEXT: scratch_load_dword v38, off, s32 offset:20
+; GFX950-NEXT: scratch_load_dword v49, off, s32 offset:32
+; GFX950-NEXT: scratch_load_dword v48, off, s32 offset:28
+; GFX950-NEXT: scratch_load_dword v57, off, s32 offset:8
+; GFX950-NEXT: scratch_load_dword v56, off, s32 offset:4
+; GFX950-NEXT: scratch_load_dword v47, off, s32 offset:40
+; GFX950-NEXT: scratch_load_dword v46, off, s32 offset:36
+; GFX950-NEXT: scratch_load_dword v45, off, s32 offset:48
+; GFX950-NEXT: scratch_load_dword v44, off, s32 offset:44
+; GFX950-NEXT: scratch_load_dword v43, off, s32 offset:56
+; GFX950-NEXT: scratch_load_dword v42, off, s32 offset:52
+; GFX950-NEXT: scratch_load_dword v41, off, s32 offset:64
+; GFX950-NEXT: scratch_load_dword v40, off, s32 offset:60
+; GFX950-NEXT: scratch_load_dword v55, off, s32 offset:72
+; GFX950-NEXT: scratch_load_dword v54, off, s32 offset:68
+; GFX950-NEXT: scratch_load_dword v53, off, s32 offset:80
+; GFX950-NEXT: scratch_load_dword v52, off, s32 offset:76
+; GFX950-NEXT: scratch_load_dword v51, off, s32 offset:88
+; GFX950-NEXT: scratch_load_dword v50, off, s32 offset:84
+; GFX950-NEXT: scratch_load_dword v35, off, s32 offset:96
+; GFX950-NEXT: scratch_load_dword v34, off, s32 offset:92
+; GFX950-NEXT: scratch_load_dword v31, off, s32
+; GFX950-NEXT: scratch_load_dword v33, off, s32 offset:104
+; GFX950-NEXT: scratch_load_dword v32, off, s32 offset:100
+; GFX950-NEXT: v_accvgpr_write_b32 a11, v58 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_write_b32 a12, v59 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_write_b32 a13, v60 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_write_b32 a14, v61 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_write_b32 a15, v62 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_write_b32 a16, v63 ; Reload Reuse
+; GFX950-NEXT: s_waitcnt vmcnt(25)
+; GFX950-NEXT: v_max_f64 v[58:59], v[2:3], v[36:37]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[36:37]
+; GFX950-NEXT: scratch_load_dword v37, off, s32 offset:112
+; GFX950-NEXT: scratch_load_dword v36, off, s32 offset:108
+; GFX950-NEXT: s_waitcnt vmcnt(25)
+; GFX950-NEXT: v_max_f64 v[60:61], v[4:5], v[38:39]
+; GFX950-NEXT: v_cmp_u_f64_e64 s[0:1], v[4:5], v[38:39]
+; GFX950-NEXT: scratch_load_dword v39, off, s32 offset:120
+; GFX950-NEXT: scratch_load_dword v38, off, s32 offset:116
+; GFX950-NEXT: s_waitcnt vmcnt(25)
+; GFX950-NEXT: v_max_f64 v[62:63], v[6:7], v[48:49]
+; GFX950-NEXT: v_cmp_u_f64_e64 s[2:3], v[6:7], v[48:49]
+; GFX950-NEXT: scratch_load_dword v49, off, s32 offset:128
+; GFX950-NEXT: scratch_load_dword v48, off, s32 offset:124
+; GFX950-NEXT: s_waitcnt vmcnt(25)
+; GFX950-NEXT: v_max_f64 v[2:3], v[0:1], v[56:57]
+; GFX950-NEXT: v_cmp_u_f64_e64 s[4:5], v[0:1], v[56:57]
+; GFX950-NEXT: v_mov_b32_e32 v0, 0x7ff80000
+; GFX950-NEXT: s_waitcnt vmcnt(23)
+; GFX950-NEXT: v_max_f64 v[56:57], v[8:9], v[46:47]
+; GFX950-NEXT: v_cndmask_b32_e64 v1, v2, 0, s[4:5]
+; GFX950-NEXT: v_accvgpr_write_b32 a0, v1
+; GFX950-NEXT: v_cndmask_b32_e64 v1, v3, v0, s[4:5]
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v58, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v59, v0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[8:9], v[46:47]
+; GFX950-NEXT: s_waitcnt vmcnt(21)
+; GFX950-NEXT: v_max_f64 v[46:47], v[10:11], v[44:45]
+; GFX950-NEXT: v_cndmask_b32_e64 v4, v60, 0, s[0:1]
+; GFX950-NEXT: v_cndmask_b32_e64 v8, v56, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v9, v57, v0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[10:11], v[44:45]
+; GFX950-NEXT: s_waitcnt vmcnt(19)
+; GFX950-NEXT: v_max_f64 v[44:45], v[12:13], v[42:43]
+; GFX950-NEXT: v_cndmask_b32_e64 v5, v61, v0, s[0:1]
+; GFX950-NEXT: v_cndmask_b32_e64 v10, v46, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v11, v47, v0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[12:13], v[42:43]
+; GFX950-NEXT: s_waitcnt vmcnt(17)
+; GFX950-NEXT: v_max_f64 v[42:43], v[14:15], v[40:41]
+; GFX950-NEXT: v_cndmask_b32_e64 v6, v62, 0, s[2:3]
+; GFX950-NEXT: v_cndmask_b32_e64 v12, v44, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v13, v45, v0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[40:41]
+; GFX950-NEXT: s_waitcnt vmcnt(15)
+; GFX950-NEXT: v_max_f64 v[40:41], v[16:17], v[54:55]
+; GFX950-NEXT: v_cndmask_b32_e64 v7, v63, v0, s[2:3]
+; GFX950-NEXT: v_cndmask_b32_e64 v14, v42, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v15, v43, v0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[16:17], v[54:55]
+; GFX950-NEXT: s_waitcnt vmcnt(13)
+; GFX950-NEXT: v_max_f64 v[54:55], v[18:19], v[52:53]
+; GFX950-NEXT: v_accvgpr_read_b32 v63, a16 ; Reload Reuse
+; GFX950-NEXT: v_cndmask_b32_e64 v16, v40, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v17, v41, v0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[18:19], v[52:53]
+; GFX950-NEXT: s_waitcnt vmcnt(11)
+; GFX950-NEXT: v_max_f64 v[52:53], v[20:21], v[50:51]
+; GFX950-NEXT: v_accvgpr_read_b32 v62, a15 ; Reload Reuse
+; GFX950-NEXT: v_cndmask_b32_e64 v18, v54, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v19, v55, v0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[20:21], v[50:51]
+; GFX950-NEXT: s_waitcnt vmcnt(9)
+; GFX950-NEXT: v_max_f64 v[50:51], v[22:23], v[34:35]
+; GFX950-NEXT: v_accvgpr_read_b32 v61, a14 ; Reload Reuse
+; GFX950-NEXT: v_cndmask_b32_e64 v20, v52, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v21, v53, v0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[22:23], v[34:35]
+; GFX950-NEXT: s_waitcnt vmcnt(6)
+; GFX950-NEXT: v_max_f64 v[34:35], v[24:25], v[32:33]
+; GFX950-NEXT: v_accvgpr_read_b32 v60, a13 ; Reload Reuse
+; GFX950-NEXT: v_cndmask_b32_e64 v22, v50, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v23, v51, v0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[24:25], v[32:33]
+; GFX950-NEXT: v_accvgpr_read_b32 v59, a12 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v58, a11 ; Reload Reuse
+; GFX950-NEXT: v_cndmask_b32_e64 v24, v34, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v25, v35, v0, vcc
+; GFX950-NEXT: v_accvgpr_read_b32 v57, a10 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v56, a9 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v47, a8 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v46, a7 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v45, a6 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v44, a5 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v43, a4 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v42, a3 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v41, a2 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v40, a1 ; Reload Reuse
+; GFX950-NEXT: s_waitcnt vmcnt(4)
+; GFX950-NEXT: v_max_f64 v[32:33], v[26:27], v[36:37]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[26:27], v[36:37]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v26, v32, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v27, v33, v0, vcc
+; GFX950-NEXT: s_waitcnt vmcnt(2)
+; GFX950-NEXT: v_max_f64 v[32:33], v[28:29], v[38:39]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[28:29], v[38:39]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v28, v32, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v29, v33, v0, vcc
+; GFX950-NEXT: s_waitcnt vmcnt(0)
+; GFX950-NEXT: v_max_f64 v[32:33], v[30:31], v[48:49]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[30:31], v[48:49]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v30, v32, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v31, v33, v0, vcc
+; GFX950-NEXT: v_accvgpr_read_b32 v0, a0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v16f64:
; GFX10: ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll
index a74043378a2598..329a85f91c2514 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll
@@ -2,7 +2,8 @@
; xUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck -check-prefixes=GCN,GFX7 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GCN,GFX8 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GCN,GFX940 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX900 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX950 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX10 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GCN,GFX12 %s
@@ -17,24 +18,24 @@ define half @v_minimum_f16(half %src0, half %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_f16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f16_e32 v2, v0, v1
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x7e00
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_f16:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f16_e32 v2, v0, v1
-; GFX940-NEXT: v_mov_b32_e32 v3, 0x7e00
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_f16:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f16_e32 v2, v0, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, 0x7e00
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_f16:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f16_e32 v2, v0, v1
+; GFX950-NEXT: v_mov_b32_e32 v3, 0x7e00
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_f16:
; GFX10: ; %bb.0:
@@ -79,12 +80,6 @@ define half @v_minimum_f16__nnan(half %src0, half %src1) {
; GFX9-NEXT: v_min_f16_e32 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_minimum_f16__nnan:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_minimum_f16__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -120,24 +115,24 @@ define half @v_minimum_f16__nsz(half %src0, half %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_f16__nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f16_e32 v2, v0, v1
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x7e00
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_f16__nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f16_e32 v2, v0, v1
-; GFX940-NEXT: v_mov_b32_e32 v3, 0x7e00
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_f16__nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f16_e32 v2, v0, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, 0x7e00
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_f16__nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f16_e32 v2, v0, v1
+; GFX950-NEXT: v_mov_b32_e32 v3, 0x7e00
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_f16__nsz:
; GFX10: ; %bb.0:
@@ -182,12 +177,6 @@ define half @v_minimum_f16__nnan_nsz(half %src0, half %src1) {
; GFX9-NEXT: v_min_f16_e32 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_minimum_f16__nnan_nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_minimum_f16__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -224,26 +213,26 @@ define half @v_minimum_f16__nnan_src0(half %arg0, half %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_f16__nnan_src0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_add_f16_e32 v0, 1.0, v0
-; GFX9-NEXT: v_min_f16_e32 v2, v0, v1
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x7e00
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_f16__nnan_src0:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_add_f16_e32 v0, 1.0, v0
-; GFX940-NEXT: v_min_f16_e32 v2, v0, v1
-; GFX940-NEXT: v_mov_b32_e32 v3, 0x7e00
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_f16__nnan_src0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_add_f16_e32 v0, 1.0, v0
+; GFX900-NEXT: v_min_f16_e32 v2, v0, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, 0x7e00
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_f16__nnan_src0:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_add_f16_e32 v0, 1.0, v0
+; GFX950-NEXT: v_min_f16_e32 v2, v0, v1
+; GFX950-NEXT: v_mov_b32_e32 v3, 0x7e00
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_f16__nnan_src0:
; GFX10: ; %bb.0:
@@ -291,26 +280,26 @@ define half @v_minimum_f16__nnan_src1(half %src0, half %arg1) {
; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_f16__nnan_src1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_add_f16_e32 v1, 1.0, v1
-; GFX9-NEXT: v_min_f16_e32 v2, v0, v1
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x7e00
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_f16__nnan_src1:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_add_f16_e32 v1, 1.0, v1
-; GFX940-NEXT: v_min_f16_e32 v2, v0, v1
-; GFX940-NEXT: v_mov_b32_e32 v3, 0x7e00
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_f16__nnan_src1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_add_f16_e32 v1, 1.0, v1
+; GFX900-NEXT: v_min_f16_e32 v2, v0, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, 0x7e00
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_f16__nnan_src1:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_add_f16_e32 v1, 1.0, v1
+; GFX950-NEXT: v_min_f16_e32 v2, v0, v1
+; GFX950-NEXT: v_mov_b32_e32 v3, 0x7e00
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_f16__nnan_src1:
; GFX10: ; %bb.0:
@@ -362,34 +351,34 @@ define void @s_minimum_f16(half inreg %src0, half inreg %src1) {
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: s_minimum_f16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v0, s17
-; GFX9-NEXT: v_min_f16_e32 v1, s16, v0
-; GFX9-NEXT: v_mov_b32_e32 v2, 0x7e00
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, s16, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use v0
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: s_minimum_f16:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_mov_b32_e32 v0, s1
-; GFX940-NEXT: v_min_f16_e32 v1, s0, v0
-; GFX940-NEXT: v_mov_b32_e32 v2, 0x7e00
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, s0, v0
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX940-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX940-NEXT: ;;#ASMSTART
-; GFX940-NEXT: ; use v0
-; GFX940-NEXT: ;;#ASMEND
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_minimum_f16:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, s17
+; GFX900-NEXT: v_min_f16_e32 v1, s16, v0
+; GFX900-NEXT: v_mov_b32_e32 v2, 0x7e00
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, s16, v0
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX900-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use v0
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: s_minimum_f16:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_mov_b32_e32 v0, s1
+; GFX950-NEXT: v_min_f16_e32 v1, s0, v0
+; GFX950-NEXT: v_mov_b32_e32 v2, 0x7e00
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, s0, v0
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX950-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX950-NEXT: ;;#ASMSTART
+; GFX950-NEXT: ; use v0
+; GFX950-NEXT: ;;#ASMEND
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: s_minimum_f16:
; GFX10: ; %bb.0:
@@ -456,35 +445,35 @@ define <2 x half> @v_minimum_v2f16(<2 x half> %src0, <2 x half> %src1) {
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v2f16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_min_f16 v2, v0, v1
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x7e00
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v3, v2, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v0, v0, v4, s4
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_v2f16:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_pk_min_f16 v2, v0, v1
-; GFX940-NEXT: v_mov_b32_e32 v3, 0x7e00
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
-; GFX940-NEXT: s_mov_b32 s0, 0x5040100
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v3, v2, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX940-NEXT: v_perm_b32 v0, v0, v4, s0
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v2f16:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_pk_min_f16 v2, v0, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, 0x7e00
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX900-NEXT: v_cndmask_b32_e32 v4, v3, v2, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v2, 16, v2
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX900-NEXT: s_mov_b32 s4, 0x5040100
+; GFX900-NEXT: v_perm_b32 v0, v0, v4, s4
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v2f16:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_pk_min_f16 v2, v0, v1
+; GFX950-NEXT: v_mov_b32_e32 v3, 0x7e00
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX950-NEXT: s_mov_b32 s0, 0x5040100
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v4, v3, v2, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v2, 16, v2
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX950-NEXT: v_perm_b32 v0, v0, v4, s0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v2f16:
; GFX10: ; %bb.0:
@@ -542,12 +531,6 @@ define <2 x half> @v_minimum_v2f16__nnan(<2 x half> %src0, <2 x half> %src1) {
; GFX9-NEXT: v_pk_min_f16 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_minimum_v2f16__nnan:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_pk_min_f16 v0, v0, v1
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_minimum_v2f16__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -590,35 +573,35 @@ define <2 x half> @v_minimum_v2f16__nsz(<2 x half> %src0, <2 x half> %src1) {
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v2f16__nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_min_f16 v2, v0, v1
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x7e00
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v3, v2, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v0, v0, v4, s4
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_v2f16__nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_pk_min_f16 v2, v0, v1
-; GFX940-NEXT: v_mov_b32_e32 v3, 0x7e00
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
-; GFX940-NEXT: s_mov_b32 s0, 0x5040100
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v3, v2, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX940-NEXT: v_perm_b32 v0, v0, v4, s0
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v2f16__nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_pk_min_f16 v2, v0, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, 0x7e00
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX900-NEXT: v_cndmask_b32_e32 v4, v3, v2, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v2, 16, v2
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX900-NEXT: s_mov_b32 s4, 0x5040100
+; GFX900-NEXT: v_perm_b32 v0, v0, v4, s4
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v2f16__nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_pk_min_f16 v2, v0, v1
+; GFX950-NEXT: v_mov_b32_e32 v3, 0x7e00
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
+; GFX950-NEXT: s_mov_b32 s0, 0x5040100
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v4, v3, v2, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v2, 16, v2
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX950-NEXT: v_perm_b32 v0, v0, v4, s0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v2f16__nsz:
; GFX10: ; %bb.0:
@@ -676,12 +659,6 @@ define <2 x half> @v_minimum_v2f16__nnan_nsz(<2 x half> %src0, <2 x half> %src1)
; GFX9-NEXT: v_pk_min_f16 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_minimum_v2f16__nnan_nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_pk_min_f16 v0, v0, v1
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_minimum_v2f16__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -729,50 +706,50 @@ define void @s_minimum_v2f16(<2 x half> inreg %src0, <2 x half> inreg %src1) {
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: s_minimum_v2f16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v0, s17
-; GFX9-NEXT: v_mov_b32_e32 v1, s17
-; GFX9-NEXT: s_lshr_b32 s4, s17, 16
-; GFX9-NEXT: v_pk_min_f16 v1, s16, v1
-; GFX9-NEXT: v_mov_b32_e32 v2, 0x7e00
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, s16, v0
-; GFX9-NEXT: s_lshr_b32 s5, s16, 16
-; GFX9-NEXT: v_mov_b32_e32 v3, s4
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v1
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, s5, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX9-NEXT: v_lshl_or_b32 v0, v1, 16, v0
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use v0
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: s_minimum_v2f16:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_mov_b32_e32 v0, s1
-; GFX940-NEXT: v_mov_b32_e32 v1, s1
-; GFX940-NEXT: s_lshr_b32 s1, s1, 16
-; GFX940-NEXT: v_pk_min_f16 v1, s0, v1
-; GFX940-NEXT: v_mov_b32_e32 v2, 0x7e00
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, s0, v0
-; GFX940-NEXT: s_lshr_b32 s0, s0, 16
-; GFX940-NEXT: v_mov_b32_e32 v3, s1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, s0, v3
-; GFX940-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX940-NEXT: v_lshl_or_b32 v0, v1, 16, v0
-; GFX940-NEXT: ;;#ASMSTART
-; GFX940-NEXT: ; use v0
-; GFX940-NEXT: ;;#ASMEND
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_minimum_v2f16:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, s17
+; GFX900-NEXT: v_mov_b32_e32 v1, s17
+; GFX900-NEXT: s_lshr_b32 s4, s17, 16
+; GFX900-NEXT: v_pk_min_f16 v1, s16, v1
+; GFX900-NEXT: v_mov_b32_e32 v2, 0x7e00
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, s16, v0
+; GFX900-NEXT: s_lshr_b32 s5, s16, 16
+; GFX900-NEXT: v_mov_b32_e32 v3, s4
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, s5, v3
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX900-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX900-NEXT: v_lshl_or_b32 v0, v1, 16, v0
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use v0
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: s_minimum_v2f16:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_mov_b32_e32 v0, s1
+; GFX950-NEXT: v_mov_b32_e32 v1, s1
+; GFX950-NEXT: s_lshr_b32 s1, s1, 16
+; GFX950-NEXT: v_pk_min_f16 v1, s0, v1
+; GFX950-NEXT: v_mov_b32_e32 v2, 0x7e00
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, s0, v0
+; GFX950-NEXT: s_lshr_b32 s0, s0, 16
+; GFX950-NEXT: v_mov_b32_e32 v3, s1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v1, 16, v1
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, s0, v3
+; GFX950-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX950-NEXT: v_lshl_or_b32 v0, v1, 16, v0
+; GFX950-NEXT: ;;#ASMSTART
+; GFX950-NEXT: ; use v0
+; GFX950-NEXT: ;;#ASMEND
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: s_minimum_v2f16:
; GFX10: ; %bb.0:
@@ -850,41 +827,41 @@ define <3 x half> @v_minimum_v3f16(<3 x half> %src0, <3 x half> %src1) {
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v3f16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_min_f16 v4, v1, v3
-; GFX9-NEXT: v_mov_b32_e32 v5, 0x7e00
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX9-NEXT: v_pk_min_f16 v3, v0, v2
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v3
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v0, v0, v4, s4
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_v3f16:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_pk_min_f16 v4, v1, v3
-; GFX940-NEXT: v_mov_b32_e32 v5, 0x7e00
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
-; GFX940-NEXT: v_pk_min_f16 v3, v0, v2
-; GFX940-NEXT: s_mov_b32 s0, 0x5040100
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v3, 16, v3
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
-; GFX940-NEXT: v_perm_b32 v0, v0, v4, s0
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v3f16:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_pk_min_f16 v4, v1, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, 0x7e00
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
+; GFX900-NEXT: v_pk_min_f16 v3, v0, v2
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX900-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v3, 16, v3
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
+; GFX900-NEXT: s_mov_b32 s4, 0x5040100
+; GFX900-NEXT: v_perm_b32 v0, v0, v4, s4
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v3f16:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_pk_min_f16 v4, v1, v3
+; GFX950-NEXT: v_mov_b32_e32 v5, 0x7e00
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX950-NEXT: v_pk_min_f16 v3, v0, v2
+; GFX950-NEXT: s_mov_b32 s0, 0x5040100
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v3, 16, v3
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
+; GFX950-NEXT: v_perm_b32 v0, v0, v4, s0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v3f16:
; GFX10: ; %bb.0:
@@ -952,13 +929,6 @@ define <3 x half> @v_minimum_v3f16__nnan(<3 x half> %src0, <3 x half> %src1) {
; GFX9-NEXT: v_pk_min_f16 v1, v1, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_minimum_v3f16__nnan:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_pk_min_f16 v0, v0, v2
-; GFX940-NEXT: v_pk_min_f16 v1, v1, v3
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_minimum_v3f16__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1007,41 +977,41 @@ define <3 x half> @v_minimum_v3f16__nsz(<3 x half> %src0, <3 x half> %src1) {
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v3f16__nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_min_f16 v4, v1, v3
-; GFX9-NEXT: v_mov_b32_e32 v5, 0x7e00
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX9-NEXT: v_pk_min_f16 v3, v0, v2
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v3
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v0, v0, v4, s4
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_v3f16__nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_pk_min_f16 v4, v1, v3
-; GFX940-NEXT: v_mov_b32_e32 v5, 0x7e00
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
-; GFX940-NEXT: v_pk_min_f16 v3, v0, v2
-; GFX940-NEXT: s_mov_b32 s0, 0x5040100
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v3, 16, v3
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
-; GFX940-NEXT: v_perm_b32 v0, v0, v4, s0
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v3f16__nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_pk_min_f16 v4, v1, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, 0x7e00
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
+; GFX900-NEXT: v_pk_min_f16 v3, v0, v2
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX900-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v3, 16, v3
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
+; GFX900-NEXT: s_mov_b32 s4, 0x5040100
+; GFX900-NEXT: v_perm_b32 v0, v0, v4, s4
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v3f16__nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_pk_min_f16 v4, v1, v3
+; GFX950-NEXT: v_mov_b32_e32 v5, 0x7e00
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX950-NEXT: v_pk_min_f16 v3, v0, v2
+; GFX950-NEXT: s_mov_b32 s0, 0x5040100
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v3, 16, v3
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
+; GFX950-NEXT: v_perm_b32 v0, v0, v4, s0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v3f16__nsz:
; GFX10: ; %bb.0:
@@ -1109,13 +1079,6 @@ define <3 x half> @v_minimum_v3f16__nnan_nsz(<3 x half> %src0, <3 x half> %src1)
; GFX9-NEXT: v_pk_min_f16 v1, v1, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_minimum_v3f16__nnan_nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_pk_min_f16 v0, v0, v2
-; GFX940-NEXT: v_pk_min_f16 v1, v1, v3
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_minimum_v3f16__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1171,51 +1134,51 @@ define <4 x half> @v_minimum_v4f16(<4 x half> %src0, <4 x half> %src1) {
; GFX8-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v4f16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_min_f16 v4, v1, v3
-; GFX9-NEXT: v_mov_b32_e32 v5, 0x7e00
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v4
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX9-NEXT: v_pk_min_f16 v3, v0, v2
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v3
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v0, v0, v4, s4
-; GFX9-NEXT: v_perm_b32 v1, v1, v6, s4
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_v4f16:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_pk_min_f16 v4, v1, v3
-; GFX940-NEXT: v_mov_b32_e32 v5, 0x7e00
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
-; GFX940-NEXT: s_mov_b32 s0, 0x5040100
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v4, 16, v4
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: v_pk_min_f16 v3, v0, v2
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
-; GFX940-NEXT: v_perm_b32 v1, v1, v6, s0
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v3, 16, v3
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
-; GFX940-NEXT: v_perm_b32 v0, v0, v4, s0
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v4f16:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_pk_min_f16 v4, v1, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, 0x7e00
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX900-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v4, 16, v4
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
+; GFX900-NEXT: v_pk_min_f16 v3, v0, v2
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX900-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v3, 16, v3
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
+; GFX900-NEXT: s_mov_b32 s4, 0x5040100
+; GFX900-NEXT: v_perm_b32 v0, v0, v4, s4
+; GFX900-NEXT: v_perm_b32 v1, v1, v6, s4
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v4f16:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_pk_min_f16 v4, v1, v3
+; GFX950-NEXT: v_mov_b32_e32 v5, 0x7e00
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX950-NEXT: s_mov_b32 s0, 0x5040100
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v4, 16, v4
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: v_pk_min_f16 v3, v0, v2
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX950-NEXT: v_perm_b32 v1, v1, v6, s0
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v3, 16, v3
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
+; GFX950-NEXT: v_perm_b32 v0, v0, v4, s0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v4f16:
; GFX10: ; %bb.0:
@@ -1294,13 +1257,6 @@ define <4 x half> @v_minimum_v4f16__nnan(<4 x half> %src0, <4 x half> %src1) {
; GFX9-NEXT: v_pk_min_f16 v1, v1, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_minimum_v4f16__nnan:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_pk_min_f16 v0, v0, v2
-; GFX940-NEXT: v_pk_min_f16 v1, v1, v3
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_minimum_v4f16__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1356,51 +1312,51 @@ define <4 x half> @v_minimum_v4f16__nsz(<4 x half> %src0, <4 x half> %src1) {
; GFX8-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v4f16__nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_min_f16 v4, v1, v3
-; GFX9-NEXT: v_mov_b32_e32 v5, 0x7e00
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v4
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX9-NEXT: v_pk_min_f16 v3, v0, v2
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v3
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v0, v0, v4, s4
-; GFX9-NEXT: v_perm_b32 v1, v1, v6, s4
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_v4f16__nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_pk_min_f16 v4, v1, v3
-; GFX940-NEXT: v_mov_b32_e32 v5, 0x7e00
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
-; GFX940-NEXT: s_mov_b32 s0, 0x5040100
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v4, 16, v4
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: v_pk_min_f16 v3, v0, v2
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
-; GFX940-NEXT: v_perm_b32 v1, v1, v6, s0
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v3, 16, v3
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
-; GFX940-NEXT: v_perm_b32 v0, v0, v4, s0
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v4f16__nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_pk_min_f16 v4, v1, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, 0x7e00
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX900-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v4, 16, v4
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
+; GFX900-NEXT: v_pk_min_f16 v3, v0, v2
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX900-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v3, 16, v3
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
+; GFX900-NEXT: s_mov_b32 s4, 0x5040100
+; GFX900-NEXT: v_perm_b32 v0, v0, v4, s4
+; GFX900-NEXT: v_perm_b32 v1, v1, v6, s4
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v4f16__nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_pk_min_f16 v4, v1, v3
+; GFX950-NEXT: v_mov_b32_e32 v5, 0x7e00
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v1, v3
+; GFX950-NEXT: s_mov_b32 s0, 0x5040100
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v4, 16, v4
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: v_pk_min_f16 v3, v0, v2
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v0, v2
+; GFX950-NEXT: v_perm_b32 v1, v1, v6, s0
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v3, 16, v3
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
+; GFX950-NEXT: v_perm_b32 v0, v0, v4, s0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v4f16__nsz:
; GFX10: ; %bb.0:
@@ -1479,13 +1435,6 @@ define <4 x half> @v_minimum_v4f16__nnan_nsz(<4 x half> %src0, <4 x half> %src1)
; GFX9-NEXT: v_pk_min_f16 v1, v1, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_minimum_v4f16__nnan_nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_pk_min_f16 v0, v0, v2
-; GFX940-NEXT: v_pk_min_f16 v1, v1, v3
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_minimum_v4f16__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1561,83 +1510,83 @@ define <8 x half> @v_minimum_v8f16(<8 x half> %src0, <8 x half> %src1) {
; GFX8-NEXT: v_or_b32_sdwa v3, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v8f16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_min_f16 v8, v3, v7
-; GFX9-NEXT: v_mov_b32_e32 v9, 0x7e00
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v3, v7
-; GFX9-NEXT: v_cndmask_b32_e32 v10, v9, v8, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v8, 16, v8
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v3, v7 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v9, v8, vcc
-; GFX9-NEXT: v_pk_min_f16 v7, v2, v6
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v2, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v9, v7, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v7, 16, v7
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v2, v6 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v9, v7, vcc
-; GFX9-NEXT: v_pk_min_f16 v6, v1, v5
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v7, v9, v6, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v6
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, v5 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v6, vcc
-; GFX9-NEXT: v_pk_min_f16 v5, v0, v4
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v9, v5, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v5
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v9, v5, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v0, v0, v6, s4
-; GFX9-NEXT: v_perm_b32 v1, v1, v7, s4
-; GFX9-NEXT: v_perm_b32 v2, v2, v8, s4
-; GFX9-NEXT: v_perm_b32 v3, v3, v10, s4
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_v8f16:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_pk_min_f16 v8, v3, v7
-; GFX940-NEXT: v_mov_b32_e32 v9, 0x7e00
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v3, v7
-; GFX940-NEXT: s_mov_b32 s0, 0x5040100
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v10, v9, v8, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v8, 16, v8
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v3, v7 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: v_pk_min_f16 v7, v2, v6
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v9, v8, vcc
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v2, v6
-; GFX940-NEXT: v_perm_b32 v3, v3, v10, s0
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v9, v7, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v7, 16, v7
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v2, v6 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: v_pk_min_f16 v6, v1, v5
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v9, v7, vcc
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v1, v5
-; GFX940-NEXT: v_perm_b32 v2, v2, v8, s0
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v9, v6, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v6, 16, v6
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v1, v5 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: v_pk_min_f16 v5, v0, v4
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v9, v6, vcc
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v4
-; GFX940-NEXT: v_perm_b32 v1, v1, v7, s0
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v9, v5, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v5, 16, v5
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v9, v5, vcc
-; GFX940-NEXT: v_perm_b32 v0, v0, v6, s0
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v8f16:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_pk_min_f16 v8, v3, v7
+; GFX900-NEXT: v_mov_b32_e32 v9, 0x7e00
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v3, v7
+; GFX900-NEXT: v_cndmask_b32_e32 v10, v9, v8, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v8, 16, v8
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v3, v7 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v3, v9, v8, vcc
+; GFX900-NEXT: v_pk_min_f16 v7, v2, v6
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v2, v6
+; GFX900-NEXT: v_cndmask_b32_e32 v8, v9, v7, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v7, 16, v7
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v2, v6 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v2, v9, v7, vcc
+; GFX900-NEXT: v_pk_min_f16 v6, v1, v5
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v1, v5
+; GFX900-NEXT: v_cndmask_b32_e32 v7, v9, v6, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v6, 16, v6
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v1, v5 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v9, v6, vcc
+; GFX900-NEXT: v_pk_min_f16 v5, v0, v4
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v4
+; GFX900-NEXT: v_cndmask_b32_e32 v6, v9, v5, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v5, 16, v5
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v9, v5, vcc
+; GFX900-NEXT: s_mov_b32 s4, 0x5040100
+; GFX900-NEXT: v_perm_b32 v0, v0, v6, s4
+; GFX900-NEXT: v_perm_b32 v1, v1, v7, s4
+; GFX900-NEXT: v_perm_b32 v2, v2, v8, s4
+; GFX900-NEXT: v_perm_b32 v3, v3, v10, s4
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v8f16:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_pk_min_f16 v8, v3, v7
+; GFX950-NEXT: v_mov_b32_e32 v9, 0x7e00
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v3, v7
+; GFX950-NEXT: s_mov_b32 s0, 0x5040100
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v10, v9, v8, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v8, 16, v8
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v3, v7 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: v_pk_min_f16 v7, v2, v6
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v9, v8, vcc
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v2, v6
+; GFX950-NEXT: v_perm_b32 v3, v3, v10, s0
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v8, v9, v7, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v7, 16, v7
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v2, v6 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: v_pk_min_f16 v6, v1, v5
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v2, v9, v7, vcc
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v1, v5
+; GFX950-NEXT: v_perm_b32 v2, v2, v8, s0
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v7, v9, v6, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v6, 16, v6
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v1, v5 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: v_pk_min_f16 v5, v0, v4
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v9, v6, vcc
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v0, v4
+; GFX950-NEXT: v_perm_b32 v1, v1, v7, s0
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v6, v9, v5, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v5, 16, v5
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v9, v5, vcc
+; GFX950-NEXT: v_perm_b32 v0, v0, v6, s0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v8f16:
; GFX10: ; %bb.0:
@@ -1818,147 +1767,147 @@ define <16 x half> @v_minimum_v16f16(<16 x half> %src0, <16 x half> %src1) {
; GFX8-NEXT: v_or_b32_sdwa v7, v7, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v16f16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_min_f16 v16, v7, v15
-; GFX9-NEXT: v_mov_b32_e32 v17, 0x7e00
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v7, v15
-; GFX9-NEXT: v_cndmask_b32_e32 v18, v17, v16, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v16, 16, v16
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v7, v15 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v7, v17, v16, vcc
-; GFX9-NEXT: v_pk_min_f16 v15, v6, v14
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v6, v14
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v17, v15, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v15, 16, v15
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v6, v14 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v17, v15, vcc
-; GFX9-NEXT: v_pk_min_f16 v14, v5, v13
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v5, v13
-; GFX9-NEXT: v_cndmask_b32_e32 v15, v17, v14, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v14, 16, v14
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v5, v13 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v17, v14, vcc
-; GFX9-NEXT: v_pk_min_f16 v13, v4, v12
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v4, v12
-; GFX9-NEXT: v_cndmask_b32_e32 v14, v17, v13, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v13, 16, v13
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v4, v12 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v17, v13, vcc
-; GFX9-NEXT: v_pk_min_f16 v12, v3, v11
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v3, v11
-; GFX9-NEXT: v_cndmask_b32_e32 v13, v17, v12, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v12, 16, v12
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v3, v11 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v17, v12, vcc
-; GFX9-NEXT: v_pk_min_f16 v11, v2, v10
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v2, v10
-; GFX9-NEXT: v_cndmask_b32_e32 v12, v17, v11, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v11, 16, v11
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v2, v10 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v17, v11, vcc
-; GFX9-NEXT: v_pk_min_f16 v10, v1, v9
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v1, v9
-; GFX9-NEXT: v_cndmask_b32_e32 v11, v17, v10, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v10, 16, v10
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, v9 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v17, v10, vcc
-; GFX9-NEXT: v_pk_min_f16 v9, v0, v8
-; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v8
-; GFX9-NEXT: v_cndmask_b32_e32 v10, v17, v9, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v9, 16, v9
-; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v8 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v17, v9, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v0, v0, v10, s4
-; GFX9-NEXT: v_perm_b32 v1, v1, v11, s4
-; GFX9-NEXT: v_perm_b32 v2, v2, v12, s4
-; GFX9-NEXT: v_perm_b32 v3, v3, v13, s4
-; GFX9-NEXT: v_perm_b32 v4, v4, v14, s4
-; GFX9-NEXT: v_perm_b32 v5, v5, v15, s4
-; GFX9-NEXT: v_perm_b32 v6, v6, v16, s4
-; GFX9-NEXT: v_perm_b32 v7, v7, v18, s4
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_v16f16:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_pk_min_f16 v16, v7, v15
-; GFX940-NEXT: v_mov_b32_e32 v17, 0x7e00
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v7, v15
-; GFX940-NEXT: s_mov_b32 s0, 0x5040100
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v18, v17, v16, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v16, 16, v16
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v7, v15 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: v_pk_min_f16 v15, v6, v14
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v17, v16, vcc
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v6, v14
-; GFX940-NEXT: v_perm_b32 v7, v7, v18, s0
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v17, v15, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v15, 16, v15
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v6, v14 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: v_pk_min_f16 v14, v5, v13
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v17, v15, vcc
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v5, v13
-; GFX940-NEXT: v_perm_b32 v6, v6, v16, s0
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v15, v17, v14, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v14, 16, v14
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v5, v13 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: v_pk_min_f16 v13, v4, v12
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v17, v14, vcc
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v4, v12
-; GFX940-NEXT: v_perm_b32 v5, v5, v15, s0
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v14, v17, v13, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v13, 16, v13
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v4, v12 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: v_pk_min_f16 v12, v3, v11
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v17, v13, vcc
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v3, v11
-; GFX940-NEXT: v_perm_b32 v4, v4, v14, s0
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v13, v17, v12, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v12, 16, v12
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v3, v11 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: v_pk_min_f16 v11, v2, v10
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v17, v12, vcc
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v2, v10
-; GFX940-NEXT: v_perm_b32 v3, v3, v13, s0
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v12, v17, v11, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v11, 16, v11
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v2, v10 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: v_pk_min_f16 v10, v1, v9
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v17, v11, vcc
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v1, v9
-; GFX940-NEXT: v_perm_b32 v2, v2, v12, s0
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v11, v17, v10, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v10, 16, v10
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v1, v9 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: v_pk_min_f16 v9, v0, v8
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v17, v10, vcc
-; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v8
-; GFX940-NEXT: v_perm_b32 v1, v1, v11, s0
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v10, v17, v9, vcc
-; GFX940-NEXT: v_lshrrev_b32_e32 v9, 16, v9
-; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v8 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v17, v9, vcc
-; GFX940-NEXT: v_perm_b32 v0, v0, v10, s0
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v16f16:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_pk_min_f16 v16, v7, v15
+; GFX900-NEXT: v_mov_b32_e32 v17, 0x7e00
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v7, v15
+; GFX900-NEXT: v_cndmask_b32_e32 v18, v17, v16, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v16, 16, v16
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v7, v15 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v7, v17, v16, vcc
+; GFX900-NEXT: v_pk_min_f16 v15, v6, v14
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v6, v14
+; GFX900-NEXT: v_cndmask_b32_e32 v16, v17, v15, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v15, 16, v15
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v6, v14 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v6, v17, v15, vcc
+; GFX900-NEXT: v_pk_min_f16 v14, v5, v13
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v5, v13
+; GFX900-NEXT: v_cndmask_b32_e32 v15, v17, v14, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v14, 16, v14
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v5, v13 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v5, v17, v14, vcc
+; GFX900-NEXT: v_pk_min_f16 v13, v4, v12
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v4, v12
+; GFX900-NEXT: v_cndmask_b32_e32 v14, v17, v13, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v13, 16, v13
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v4, v12 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v4, v17, v13, vcc
+; GFX900-NEXT: v_pk_min_f16 v12, v3, v11
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v3, v11
+; GFX900-NEXT: v_cndmask_b32_e32 v13, v17, v12, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v12, 16, v12
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v3, v11 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v3, v17, v12, vcc
+; GFX900-NEXT: v_pk_min_f16 v11, v2, v10
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v2, v10
+; GFX900-NEXT: v_cndmask_b32_e32 v12, v17, v11, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v11, 16, v11
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v2, v10 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v2, v17, v11, vcc
+; GFX900-NEXT: v_pk_min_f16 v10, v1, v9
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v1, v9
+; GFX900-NEXT: v_cndmask_b32_e32 v11, v17, v10, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v10, 16, v10
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v1, v9 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v17, v10, vcc
+; GFX900-NEXT: v_pk_min_f16 v9, v0, v8
+; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v8
+; GFX900-NEXT: v_cndmask_b32_e32 v10, v17, v9, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v9, 16, v9
+; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v0, v8 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v17, v9, vcc
+; GFX900-NEXT: s_mov_b32 s4, 0x5040100
+; GFX900-NEXT: v_perm_b32 v0, v0, v10, s4
+; GFX900-NEXT: v_perm_b32 v1, v1, v11, s4
+; GFX900-NEXT: v_perm_b32 v2, v2, v12, s4
+; GFX900-NEXT: v_perm_b32 v3, v3, v13, s4
+; GFX900-NEXT: v_perm_b32 v4, v4, v14, s4
+; GFX900-NEXT: v_perm_b32 v5, v5, v15, s4
+; GFX900-NEXT: v_perm_b32 v6, v6, v16, s4
+; GFX900-NEXT: v_perm_b32 v7, v7, v18, s4
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v16f16:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_pk_min_f16 v16, v7, v15
+; GFX950-NEXT: v_mov_b32_e32 v17, 0x7e00
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v7, v15
+; GFX950-NEXT: s_mov_b32 s0, 0x5040100
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v18, v17, v16, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v16, 16, v16
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v7, v15 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: v_pk_min_f16 v15, v6, v14
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v7, v17, v16, vcc
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v6, v14
+; GFX950-NEXT: v_perm_b32 v7, v7, v18, s0
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v16, v17, v15, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v15, 16, v15
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v6, v14 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: v_pk_min_f16 v14, v5, v13
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v6, v17, v15, vcc
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v5, v13
+; GFX950-NEXT: v_perm_b32 v6, v6, v16, s0
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v15, v17, v14, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v14, 16, v14
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v5, v13 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: v_pk_min_f16 v13, v4, v12
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v5, v17, v14, vcc
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v4, v12
+; GFX950-NEXT: v_perm_b32 v5, v5, v15, s0
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v14, v17, v13, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v13, 16, v13
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v4, v12 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: v_pk_min_f16 v12, v3, v11
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v4, v17, v13, vcc
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v3, v11
+; GFX950-NEXT: v_perm_b32 v4, v4, v14, s0
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v13, v17, v12, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v12, 16, v12
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v3, v11 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: v_pk_min_f16 v11, v2, v10
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v17, v12, vcc
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v2, v10
+; GFX950-NEXT: v_perm_b32 v3, v3, v13, s0
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v12, v17, v11, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v11, 16, v11
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v2, v10 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: v_pk_min_f16 v10, v1, v9
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v2, v17, v11, vcc
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v1, v9
+; GFX950-NEXT: v_perm_b32 v2, v2, v12, s0
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v11, v17, v10, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v10, 16, v10
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v1, v9 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: v_pk_min_f16 v9, v0, v8
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v17, v10, vcc
+; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v0, v8
+; GFX950-NEXT: v_perm_b32 v1, v1, v11, s0
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v10, v17, v9, vcc
+; GFX950-NEXT: v_lshrrev_b32_e32 v9, 16, v9
+; GFX950-NEXT: v_cmp_o_f16_sdwa vcc, v0, v8 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v17, v9, vcc
+; GFX950-NEXT: v_perm_b32 v0, v0, v10, s0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v16f16:
; GFX10: ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll
index 2b3041290b5866..2614fb3bf9f737 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll
@@ -2,7 +2,8 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck -check-prefixes=GCN,GFX7 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GCN,GFX8 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GCN,GFX940 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX900 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX950 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX10 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GCN,GFX12 %s
@@ -26,24 +27,24 @@ define float @v_minimum_f32(float %src0, float %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_f32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e32 v2, v0, v1
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_f32:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f32_e32 v2, v0, v1
-; GFX940-NEXT: v_mov_b32_e32 v3, 0x7fc00000
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_f32:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f32_e32 v2, v0, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, 0x7fc00000
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_f32:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f32_e32 v2, v0, v1
+; GFX950-NEXT: v_mov_b32_e32 v3, 0x7fc00000
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_f32:
; GFX10: ; %bb.0:
@@ -94,12 +95,6 @@ define float @v_minimum_f32__nnan(float %src0, float %src1) {
; GFX9-NEXT: v_min_f32_e32 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_minimum_f32__nnan:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_minimum_f32__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -144,24 +139,24 @@ define float @v_minimum_f32__nsz(float %src0, float %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_f32__nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e32 v2, v0, v1
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_f32__nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f32_e32 v2, v0, v1
-; GFX940-NEXT: v_mov_b32_e32 v3, 0x7fc00000
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_f32__nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f32_e32 v2, v0, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, 0x7fc00000
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_f32__nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f32_e32 v2, v0, v1
+; GFX950-NEXT: v_mov_b32_e32 v3, 0x7fc00000
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_f32__nsz:
; GFX10: ; %bb.0:
@@ -212,12 +207,6 @@ define float @v_minimum_f32__nnan_nsz(float %src0, float %src1) {
; GFX9-NEXT: v_min_f32_e32 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_minimum_f32__nnan_nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_minimum_f32__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -264,26 +253,26 @@ define float @v_minimum_f32__nnan_src0(float %arg0, float %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_f32__nnan_src0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_add_f32_e32 v0, 1.0, v0
-; GFX9-NEXT: v_min_f32_e32 v2, v0, v1
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_f32__nnan_src0:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_add_f32_e32 v0, 1.0, v0
-; GFX940-NEXT: v_min_f32_e32 v2, v0, v1
-; GFX940-NEXT: v_mov_b32_e32 v3, 0x7fc00000
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_f32__nnan_src0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_add_f32_e32 v0, 1.0, v0
+; GFX900-NEXT: v_min_f32_e32 v2, v0, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, 0x7fc00000
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_f32__nnan_src0:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_add_f32_e32 v0, 1.0, v0
+; GFX950-NEXT: v_min_f32_e32 v2, v0, v1
+; GFX950-NEXT: v_mov_b32_e32 v3, 0x7fc00000
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_f32__nnan_src0:
; GFX10: ; %bb.0:
@@ -341,26 +330,26 @@ define float @v_minimum_f32__nnan_src1(float %src0, float %arg1) {
; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_f32__nnan_src1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_add_f32_e32 v1, 1.0, v1
-; GFX9-NEXT: v_min_f32_e32 v2, v0, v1
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_f32__nnan_src1:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_add_f32_e32 v1, 1.0, v1
-; GFX940-NEXT: v_min_f32_e32 v2, v0, v1
-; GFX940-NEXT: v_mov_b32_e32 v3, 0x7fc00000
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_f32__nnan_src1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_add_f32_e32 v1, 1.0, v1
+; GFX900-NEXT: v_min_f32_e32 v2, v0, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, 0x7fc00000
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_f32__nnan_src1:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_add_f32_e32 v1, 1.0, v1
+; GFX950-NEXT: v_min_f32_e32 v2, v0, v1
+; GFX950-NEXT: v_mov_b32_e32 v3, 0x7fc00000
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_f32__nnan_src1:
; GFX10: ; %bb.0:
@@ -424,32 +413,32 @@ define void @s_minimum_f32(float inreg %src0, float inreg %src1) {
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: s_minimum_f32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v0, s17
-; GFX9-NEXT: v_min_f32_e32 v1, s16, v0
-; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s16, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use v0
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: s_minimum_f32:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_mov_b32_e32 v0, s1
-; GFX940-NEXT: v_min_f32_e32 v1, s0, v0
-; GFX940-NEXT: v_mov_b32_e32 v2, 0x7fc00000
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, s0, v0
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX940-NEXT: ;;#ASMSTART
-; GFX940-NEXT: ; use v0
-; GFX940-NEXT: ;;#ASMEND
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_minimum_f32:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, s17
+; GFX900-NEXT: v_min_f32_e32 v1, s16, v0
+; GFX900-NEXT: v_mov_b32_e32 v2, 0x7fc00000
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, s16, v0
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use v0
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: s_minimum_f32:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_mov_b32_e32 v0, s1
+; GFX950-NEXT: v_min_f32_e32 v1, s0, v0
+; GFX950-NEXT: v_mov_b32_e32 v2, 0x7fc00000
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, s0, v0
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX950-NEXT: ;;#ASMSTART
+; GFX950-NEXT: ; use v0
+; GFX950-NEXT: ;;#ASMEND
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: s_minimum_f32:
; GFX10: ; %bb.0:
@@ -517,31 +506,31 @@ define <2 x float> @v_minimum_v2f32(<2 x float> %src0, <2 x float> %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v2f32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e32 v4, v0, v2
-; GFX9-NEXT: v_mov_b32_e32 v5, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX9-NEXT: v_min_f32_e32 v2, v1, v3
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_v2f32:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f32_e32 v4, v0, v2
-; GFX940-NEXT: v_mov_b32_e32 v5, 0x7fc00000
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX940-NEXT: v_min_f32_e32 v2, v1, v3
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v2f32:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f32_e32 v4, v0, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, 0x7fc00000
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
+; GFX900-NEXT: v_min_f32_e32 v2, v1, v3
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v2f32:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f32_e32 v4, v0, v2
+; GFX950-NEXT: v_mov_b32_e32 v5, 0x7fc00000
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX950-NEXT: v_min_f32_e32 v2, v1, v3
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v2f32:
; GFX10: ; %bb.0:
@@ -601,13 +590,6 @@ define <2 x float> @v_minimum_v2f32__nnan(<2 x float> %src0, <2 x float> %src1)
; GFX9-NEXT: v_min_f32_e32 v1, v1, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_minimum_v2f32__nnan:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f32_e32 v0, v0, v2
-; GFX940-NEXT: v_min_f32_e32 v1, v1, v3
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_minimum_v2f32__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -660,31 +642,31 @@ define <2 x float> @v_minimum_v2f32__nsz(<2 x float> %src0, <2 x float> %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v2f32__nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e32 v4, v0, v2
-; GFX9-NEXT: v_mov_b32_e32 v5, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX9-NEXT: v_min_f32_e32 v2, v1, v3
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_v2f32__nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f32_e32 v4, v0, v2
-; GFX940-NEXT: v_mov_b32_e32 v5, 0x7fc00000
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX940-NEXT: v_min_f32_e32 v2, v1, v3
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v2f32__nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f32_e32 v4, v0, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, 0x7fc00000
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
+; GFX900-NEXT: v_min_f32_e32 v2, v1, v3
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v2f32__nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f32_e32 v4, v0, v2
+; GFX950-NEXT: v_mov_b32_e32 v5, 0x7fc00000
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX950-NEXT: v_min_f32_e32 v2, v1, v3
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v2f32__nsz:
; GFX10: ; %bb.0:
@@ -744,13 +726,6 @@ define <2 x float> @v_minimum_v2f32__nnan_nsz(<2 x float> %src0, <2 x float> %sr
; GFX9-NEXT: v_min_f32_e32 v1, v1, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_minimum_v2f32__nnan_nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f32_e32 v0, v0, v2
-; GFX940-NEXT: v_min_f32_e32 v1, v1, v3
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_minimum_v2f32__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -813,40 +788,40 @@ define void @s_minimum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) {
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: s_minimum_v2f32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v0, s19
-; GFX9-NEXT: v_min_f32_e32 v1, s17, v0
-; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s17, v0
-; GFX9-NEXT: v_mov_b32_e32 v0, s18
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX9-NEXT: v_min_f32_e32 v3, s16, v0
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s16, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use v[0:1]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: s_minimum_v2f32:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_mov_b32_e32 v0, s3
-; GFX940-NEXT: v_min_f32_e32 v1, s1, v0
-; GFX940-NEXT: v_mov_b32_e32 v2, 0x7fc00000
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, s1, v0
-; GFX940-NEXT: v_mov_b32_e32 v0, s2
-; GFX940-NEXT: v_min_f32_e32 v3, s0, v0
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, s0, v0
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
-; GFX940-NEXT: ;;#ASMSTART
-; GFX940-NEXT: ; use v[0:1]
-; GFX940-NEXT: ;;#ASMEND
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_minimum_v2f32:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, s19
+; GFX900-NEXT: v_min_f32_e32 v1, s17, v0
+; GFX900-NEXT: v_mov_b32_e32 v2, 0x7fc00000
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, s17, v0
+; GFX900-NEXT: v_mov_b32_e32 v0, s18
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX900-NEXT: v_min_f32_e32 v3, s16, v0
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, s16, v0
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use v[0:1]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: s_minimum_v2f32:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_mov_b32_e32 v0, s3
+; GFX950-NEXT: v_min_f32_e32 v1, s1, v0
+; GFX950-NEXT: v_mov_b32_e32 v2, 0x7fc00000
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, s1, v0
+; GFX950-NEXT: v_mov_b32_e32 v0, s2
+; GFX950-NEXT: v_min_f32_e32 v3, s0, v0
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, s0, v0
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
+; GFX950-NEXT: ;;#ASMSTART
+; GFX950-NEXT: ; use v[0:1]
+; GFX950-NEXT: ;;#ASMEND
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: s_minimum_v2f32:
; GFX10: ; %bb.0:
@@ -927,38 +902,38 @@ define <3 x float> @v_minimum_v3f32(<3 x float> %src0, <3 x float> %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v3f32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e32 v6, v0, v3
-; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX9-NEXT: v_min_f32_e32 v3, v1, v4
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX9-NEXT: v_min_f32_e32 v3, v2, v5
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_v3f32:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f32_e32 v6, v0, v3
-; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX940-NEXT: v_min_f32_e32 v3, v1, v4
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX940-NEXT: v_min_f32_e32 v3, v2, v5
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v3f32:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f32_e32 v6, v0, v3
+; GFX900-NEXT: v_mov_b32_e32 v7, 0x7fc00000
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX900-NEXT: v_min_f32_e32 v3, v1, v4
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
+; GFX900-NEXT: v_min_f32_e32 v3, v2, v5
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
+; GFX900-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v3f32:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f32_e32 v6, v0, v3
+; GFX950-NEXT: v_mov_b32_e32 v7, 0x7fc00000
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
+; GFX950-NEXT: v_min_f32_e32 v3, v1, v4
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
+; GFX950-NEXT: v_min_f32_e32 v3, v2, v5
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v3f32:
; GFX10: ; %bb.0:
@@ -1028,14 +1003,6 @@ define <3 x float> @v_minimum_v3f32__nnan(<3 x float> %src0, <3 x float> %src1)
; GFX9-NEXT: v_min_f32_e32 v2, v2, v5
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_minimum_v3f32__nnan:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f32_e32 v0, v0, v3
-; GFX940-NEXT: v_min_f32_e32 v1, v1, v4
-; GFX940-NEXT: v_min_f32_e32 v2, v2, v5
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_minimum_v3f32__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1097,38 +1064,38 @@ define <3 x float> @v_minimum_v3f32__nsz(<3 x float> %src0, <3 x float> %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v3f32__nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e32 v6, v0, v3
-; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX9-NEXT: v_min_f32_e32 v3, v1, v4
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX9-NEXT: v_min_f32_e32 v3, v2, v5
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_v3f32__nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f32_e32 v6, v0, v3
-; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX940-NEXT: v_min_f32_e32 v3, v1, v4
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX940-NEXT: v_min_f32_e32 v3, v2, v5
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v3f32__nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f32_e32 v6, v0, v3
+; GFX900-NEXT: v_mov_b32_e32 v7, 0x7fc00000
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX900-NEXT: v_min_f32_e32 v3, v1, v4
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
+; GFX900-NEXT: v_min_f32_e32 v3, v2, v5
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
+; GFX900-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v3f32__nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f32_e32 v6, v0, v3
+; GFX950-NEXT: v_mov_b32_e32 v7, 0x7fc00000
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
+; GFX950-NEXT: v_min_f32_e32 v3, v1, v4
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
+; GFX950-NEXT: v_min_f32_e32 v3, v2, v5
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v3f32__nsz:
; GFX10: ; %bb.0:
@@ -1198,14 +1165,6 @@ define <3 x float> @v_minimum_v3f32__nnan_nsz(<3 x float> %src0, <3 x float> %sr
; GFX9-NEXT: v_min_f32_e32 v2, v2, v5
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_minimum_v3f32__nnan_nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f32_e32 v0, v0, v3
-; GFX940-NEXT: v_min_f32_e32 v1, v1, v4
-; GFX940-NEXT: v_min_f32_e32 v2, v2, v5
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_minimum_v3f32__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1273,45 +1232,45 @@ define <4 x float> @v_minimum_v4f32(<4 x float> %src0, <4 x float> %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v4f32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e32 v8, v0, v4
-; GFX9-NEXT: v_mov_b32_e32 v9, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX9-NEXT: v_min_f32_e32 v4, v1, v5
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
-; GFX9-NEXT: v_min_f32_e32 v4, v2, v6
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
-; GFX9-NEXT: v_min_f32_e32 v4, v3, v7
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_v4f32:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f32_e32 v8, v0, v4
-; GFX940-NEXT: v_mov_b32_e32 v9, 0x7fc00000
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
-; GFX940-NEXT: v_min_f32_e32 v4, v1, v5
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
-; GFX940-NEXT: v_min_f32_e32 v4, v2, v6
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
-; GFX940-NEXT: v_min_f32_e32 v4, v3, v7
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v4f32:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f32_e32 v8, v0, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, 0x7fc00000
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
+; GFX900-NEXT: v_min_f32_e32 v4, v1, v5
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
+; GFX900-NEXT: v_min_f32_e32 v4, v2, v6
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
+; GFX900-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
+; GFX900-NEXT: v_min_f32_e32 v4, v3, v7
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
+; GFX900-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v4f32:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f32_e32 v8, v0, v4
+; GFX950-NEXT: v_mov_b32_e32 v9, 0x7fc00000
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
+; GFX950-NEXT: v_min_f32_e32 v4, v1, v5
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
+; GFX950-NEXT: v_min_f32_e32 v4, v2, v6
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
+; GFX950-NEXT: v_min_f32_e32 v4, v3, v7
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v4f32:
; GFX10: ; %bb.0:
@@ -1391,15 +1350,6 @@ define <4 x float> @v_minimum_v4f32__nnan(<4 x float> %src0, <4 x float> %src1)
; GFX9-NEXT: v_min_f32_e32 v3, v3, v7
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_minimum_v4f32__nnan:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f32_e32 v0, v0, v4
-; GFX940-NEXT: v_min_f32_e32 v1, v1, v5
-; GFX940-NEXT: v_min_f32_e32 v2, v2, v6
-; GFX940-NEXT: v_min_f32_e32 v3, v3, v7
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_minimum_v4f32__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1469,45 +1419,45 @@ define <4 x float> @v_minimum_v4f32__nsz(<4 x float> %src0, <4 x float> %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v4f32__nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e32 v8, v0, v4
-; GFX9-NEXT: v_mov_b32_e32 v9, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX9-NEXT: v_min_f32_e32 v4, v1, v5
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
-; GFX9-NEXT: v_min_f32_e32 v4, v2, v6
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
-; GFX9-NEXT: v_min_f32_e32 v4, v3, v7
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_v4f32__nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f32_e32 v8, v0, v4
-; GFX940-NEXT: v_mov_b32_e32 v9, 0x7fc00000
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
-; GFX940-NEXT: v_min_f32_e32 v4, v1, v5
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
-; GFX940-NEXT: v_min_f32_e32 v4, v2, v6
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
-; GFX940-NEXT: v_min_f32_e32 v4, v3, v7
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v4f32__nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f32_e32 v8, v0, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, 0x7fc00000
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
+; GFX900-NEXT: v_min_f32_e32 v4, v1, v5
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
+; GFX900-NEXT: v_min_f32_e32 v4, v2, v6
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
+; GFX900-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
+; GFX900-NEXT: v_min_f32_e32 v4, v3, v7
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
+; GFX900-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v4f32__nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f32_e32 v8, v0, v4
+; GFX950-NEXT: v_mov_b32_e32 v9, 0x7fc00000
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
+; GFX950-NEXT: v_min_f32_e32 v4, v1, v5
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
+; GFX950-NEXT: v_min_f32_e32 v4, v2, v6
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
+; GFX950-NEXT: v_min_f32_e32 v4, v3, v7
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v4f32__nsz:
; GFX10: ; %bb.0:
@@ -1587,15 +1537,6 @@ define <4 x float> @v_minimum_v4f32__nnan_nsz(<4 x float> %src0, <4 x float> %sr
; GFX9-NEXT: v_min_f32_e32 v3, v3, v7
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_minimum_v4f32__nnan_nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f32_e32 v0, v0, v4
-; GFX940-NEXT: v_min_f32_e32 v1, v1, v5
-; GFX940-NEXT: v_min_f32_e32 v2, v2, v6
-; GFX940-NEXT: v_min_f32_e32 v3, v3, v7
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_minimum_v4f32__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1689,73 +1630,73 @@ define <8 x float> @v_minimum_v8f32(<8 x float> %src0, <8 x float> %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v7, v17, v8, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v8f32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e32 v16, v0, v8
-; GFX9-NEXT: v_mov_b32_e32 v17, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v8
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v17, v16, vcc
-; GFX9-NEXT: v_min_f32_e32 v8, v1, v9
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v9
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v17, v8, vcc
-; GFX9-NEXT: v_min_f32_e32 v8, v2, v10
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v10
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v17, v8, vcc
-; GFX9-NEXT: v_min_f32_e32 v8, v3, v11
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v3, v11
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v17, v8, vcc
-; GFX9-NEXT: v_min_f32_e32 v8, v4, v12
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v4, v12
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v17, v8, vcc
-; GFX9-NEXT: v_min_f32_e32 v8, v5, v13
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v5, v13
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v17, v8, vcc
-; GFX9-NEXT: v_min_f32_e32 v8, v6, v14
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v6, v14
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v17, v8, vcc
-; GFX9-NEXT: v_min_f32_e32 v8, v7, v15
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v7, v15
-; GFX9-NEXT: v_cndmask_b32_e32 v7, v17, v8, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_v8f32:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f32_e32 v16, v0, v8
-; GFX940-NEXT: v_mov_b32_e32 v17, 0x7fc00000
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v8
-; GFX940-NEXT: v_min_f32_e32 v8, v1, v9
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v17, v16, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v9
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v17, v8, vcc
-; GFX940-NEXT: v_min_f32_e32 v8, v2, v10
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v10
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v17, v8, vcc
-; GFX940-NEXT: v_min_f32_e32 v8, v3, v11
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v3, v11
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v17, v8, vcc
-; GFX940-NEXT: v_min_f32_e32 v8, v4, v12
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v4, v12
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v17, v8, vcc
-; GFX940-NEXT: v_min_f32_e32 v8, v5, v13
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v5, v13
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v17, v8, vcc
-; GFX940-NEXT: v_min_f32_e32 v8, v6, v14
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v6, v14
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v17, v8, vcc
-; GFX940-NEXT: v_min_f32_e32 v8, v7, v15
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v7, v15
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v17, v8, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v8f32:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f32_e32 v16, v0, v8
+; GFX900-NEXT: v_mov_b32_e32 v17, 0x7fc00000
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v8
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v17, v16, vcc
+; GFX900-NEXT: v_min_f32_e32 v8, v1, v9
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v1, v9
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v17, v8, vcc
+; GFX900-NEXT: v_min_f32_e32 v8, v2, v10
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v2, v10
+; GFX900-NEXT: v_cndmask_b32_e32 v2, v17, v8, vcc
+; GFX900-NEXT: v_min_f32_e32 v8, v3, v11
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v3, v11
+; GFX900-NEXT: v_cndmask_b32_e32 v3, v17, v8, vcc
+; GFX900-NEXT: v_min_f32_e32 v8, v4, v12
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v4, v12
+; GFX900-NEXT: v_cndmask_b32_e32 v4, v17, v8, vcc
+; GFX900-NEXT: v_min_f32_e32 v8, v5, v13
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v5, v13
+; GFX900-NEXT: v_cndmask_b32_e32 v5, v17, v8, vcc
+; GFX900-NEXT: v_min_f32_e32 v8, v6, v14
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v6, v14
+; GFX900-NEXT: v_cndmask_b32_e32 v6, v17, v8, vcc
+; GFX900-NEXT: v_min_f32_e32 v8, v7, v15
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v7, v15
+; GFX900-NEXT: v_cndmask_b32_e32 v7, v17, v8, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v8f32:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f32_e32 v16, v0, v8
+; GFX950-NEXT: v_mov_b32_e32 v17, 0x7fc00000
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v8
+; GFX950-NEXT: v_min_f32_e32 v8, v1, v9
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v17, v16, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v9
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v17, v8, vcc
+; GFX950-NEXT: v_min_f32_e32 v8, v2, v10
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v2, v10
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v2, v17, v8, vcc
+; GFX950-NEXT: v_min_f32_e32 v8, v3, v11
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v3, v11
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v17, v8, vcc
+; GFX950-NEXT: v_min_f32_e32 v8, v4, v12
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v4, v12
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v4, v17, v8, vcc
+; GFX950-NEXT: v_min_f32_e32 v8, v5, v13
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v5, v13
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v5, v17, v8, vcc
+; GFX950-NEXT: v_min_f32_e32 v8, v6, v14
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v6, v14
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v6, v17, v8, vcc
+; GFX950-NEXT: v_min_f32_e32 v8, v7, v15
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v7, v15
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v7, v17, v8, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v8f32:
; GFX10: ; %bb.0:
@@ -1968,136 +1909,136 @@ define <16 x float> @v_minimum_v16f32(<16 x float> %src0, <16 x float> %src1) {
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v16f32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX9-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: v_cmp_o_f32_e64 s[16:17], v0, v16
-; GFX9-NEXT: v_min_f32_e32 v0, v0, v16
-; GFX9-NEXT: buffer_load_dword v16, off, s[0:3], s32
-; GFX9-NEXT: v_writelane_b32 v31, s30, 0
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v17
-; GFX9-NEXT: v_min_f32_e32 v1, v1, v17
-; GFX9-NEXT: v_cmp_o_f32_e64 s[4:5], v2, v18
-; GFX9-NEXT: v_min_f32_e32 v2, v2, v18
-; GFX9-NEXT: v_mov_b32_e32 v17, 0x7fc00000
-; GFX9-NEXT: v_min_f32_e32 v18, v13, v29
-; GFX9-NEXT: v_cmp_o_f32_e64 s[28:29], v13, v29
-; GFX9-NEXT: v_writelane_b32 v31, s31, 1
-; GFX9-NEXT: v_cmp_o_f32_e64 s[6:7], v3, v19
-; GFX9-NEXT: v_min_f32_e32 v3, v3, v19
-; GFX9-NEXT: v_cmp_o_f32_e64 s[8:9], v4, v20
-; GFX9-NEXT: v_min_f32_e32 v4, v4, v20
-; GFX9-NEXT: v_cmp_o_f32_e64 s[10:11], v5, v21
-; GFX9-NEXT: v_min_f32_e32 v5, v5, v21
-; GFX9-NEXT: v_cmp_o_f32_e64 s[12:13], v6, v22
-; GFX9-NEXT: v_min_f32_e32 v6, v6, v22
-; GFX9-NEXT: v_cmp_o_f32_e64 s[14:15], v7, v23
-; GFX9-NEXT: v_min_f32_e32 v7, v7, v23
-; GFX9-NEXT: v_cmp_o_f32_e64 s[18:19], v8, v24
-; GFX9-NEXT: v_min_f32_e32 v8, v8, v24
-; GFX9-NEXT: v_cmp_o_f32_e64 s[20:21], v9, v25
-; GFX9-NEXT: v_min_f32_e32 v9, v9, v25
-; GFX9-NEXT: v_cmp_o_f32_e64 s[22:23], v10, v26
-; GFX9-NEXT: v_min_f32_e32 v10, v10, v26
-; GFX9-NEXT: v_cmp_o_f32_e64 s[24:25], v11, v27
-; GFX9-NEXT: v_min_f32_e32 v11, v11, v27
-; GFX9-NEXT: v_cmp_o_f32_e64 s[26:27], v12, v28
-; GFX9-NEXT: v_min_f32_e32 v12, v12, v28
-; GFX9-NEXT: v_min_f32_e32 v19, v14, v30
-; GFX9-NEXT: v_cmp_o_f32_e64 s[30:31], v14, v30
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v13, v17, v18, s[28:29]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v17, v0, s[16:17]
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v17, v2, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v17, v3, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v17, v4, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v17, v5, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v17, v6, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v17, v7, s[14:15]
-; GFX9-NEXT: v_cndmask_b32_e64 v8, v17, v8, s[18:19]
-; GFX9-NEXT: v_cndmask_b32_e64 v9, v17, v9, s[20:21]
-; GFX9-NEXT: v_cndmask_b32_e64 v10, v17, v10, s[22:23]
-; GFX9-NEXT: v_cndmask_b32_e64 v11, v17, v11, s[24:25]
-; GFX9-NEXT: v_cndmask_b32_e64 v12, v17, v12, s[26:27]
-; GFX9-NEXT: v_cndmask_b32_e64 v14, v17, v19, s[30:31]
-; GFX9-NEXT: v_readlane_b32 s31, v31, 1
-; GFX9-NEXT: v_readlane_b32 s30, v31, 0
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_min_f32_e32 v18, v15, v16
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v15, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v15, v17, v18, vcc
-; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_v16f32:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: scratch_load_dword v31, off, s32
-; GFX940-NEXT: v_mov_b32_e32 v32, 0x7fc00000
-; GFX940-NEXT: v_min_f32_e32 v33, v0, v16
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v16
-; GFX940-NEXT: v_min_f32_e32 v34, v1, v17
-; GFX940-NEXT: v_min_f32_e32 v35, v2, v18
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v32, v33, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v17
-; GFX940-NEXT: v_min_f32_e32 v36, v3, v19
-; GFX940-NEXT: v_min_f32_e32 v37, v4, v20
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v32, v34, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v18
-; GFX940-NEXT: v_min_f32_e32 v38, v5, v21
-; GFX940-NEXT: v_min_f32_e32 v39, v6, v22
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v32, v35, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v3, v19
-; GFX940-NEXT: v_min_f32_e32 v48, v7, v23
-; GFX940-NEXT: v_min_f32_e32 v49, v8, v24
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v32, v36, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v4, v20
-; GFX940-NEXT: v_min_f32_e32 v50, v9, v25
-; GFX940-NEXT: v_min_f32_e32 v51, v10, v26
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v32, v37, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v5, v21
-; GFX940-NEXT: v_min_f32_e32 v52, v11, v27
-; GFX940-NEXT: v_min_f32_e32 v53, v12, v28
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v32, v38, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v6, v22
-; GFX940-NEXT: v_min_f32_e32 v54, v13, v29
-; GFX940-NEXT: v_min_f32_e32 v55, v14, v30
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v32, v39, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v7, v23
-; GFX940-NEXT: s_waitcnt vmcnt(0)
-; GFX940-NEXT: v_min_f32_e32 v16, v15, v31
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v32, v48, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v8, v24
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v32, v49, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v9, v25
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v9, v32, v50, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v10, v26
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v10, v32, v51, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v11, v27
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v11, v32, v52, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v12, v28
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v12, v32, v53, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v13, v29
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v13, v32, v54, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v14, v30
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v14, v32, v55, vcc
-; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v15, v31
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v15, v32, v16, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v16f32:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX900-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; GFX900-NEXT: s_mov_b64 exec, s[4:5]
+; GFX900-NEXT: v_cmp_o_f32_e64 s[16:17], v0, v16
+; GFX900-NEXT: v_min_f32_e32 v0, v0, v16
+; GFX900-NEXT: buffer_load_dword v16, off, s[0:3], s32
+; GFX900-NEXT: v_writelane_b32 v31, s30, 0
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v1, v17
+; GFX900-NEXT: v_min_f32_e32 v1, v1, v17
+; GFX900-NEXT: v_cmp_o_f32_e64 s[4:5], v2, v18
+; GFX900-NEXT: v_min_f32_e32 v2, v2, v18
+; GFX900-NEXT: v_mov_b32_e32 v17, 0x7fc00000
+; GFX900-NEXT: v_min_f32_e32 v18, v13, v29
+; GFX900-NEXT: v_cmp_o_f32_e64 s[28:29], v13, v29
+; GFX900-NEXT: v_writelane_b32 v31, s31, 1
+; GFX900-NEXT: v_cmp_o_f32_e64 s[6:7], v3, v19
+; GFX900-NEXT: v_min_f32_e32 v3, v3, v19
+; GFX900-NEXT: v_cmp_o_f32_e64 s[8:9], v4, v20
+; GFX900-NEXT: v_min_f32_e32 v4, v4, v20
+; GFX900-NEXT: v_cmp_o_f32_e64 s[10:11], v5, v21
+; GFX900-NEXT: v_min_f32_e32 v5, v5, v21
+; GFX900-NEXT: v_cmp_o_f32_e64 s[12:13], v6, v22
+; GFX900-NEXT: v_min_f32_e32 v6, v6, v22
+; GFX900-NEXT: v_cmp_o_f32_e64 s[14:15], v7, v23
+; GFX900-NEXT: v_min_f32_e32 v7, v7, v23
+; GFX900-NEXT: v_cmp_o_f32_e64 s[18:19], v8, v24
+; GFX900-NEXT: v_min_f32_e32 v8, v8, v24
+; GFX900-NEXT: v_cmp_o_f32_e64 s[20:21], v9, v25
+; GFX900-NEXT: v_min_f32_e32 v9, v9, v25
+; GFX900-NEXT: v_cmp_o_f32_e64 s[22:23], v10, v26
+; GFX900-NEXT: v_min_f32_e32 v10, v10, v26
+; GFX900-NEXT: v_cmp_o_f32_e64 s[24:25], v11, v27
+; GFX900-NEXT: v_min_f32_e32 v11, v11, v27
+; GFX900-NEXT: v_cmp_o_f32_e64 s[26:27], v12, v28
+; GFX900-NEXT: v_min_f32_e32 v12, v12, v28
+; GFX900-NEXT: v_min_f32_e32 v19, v14, v30
+; GFX900-NEXT: v_cmp_o_f32_e64 s[30:31], v14, v30
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc
+; GFX900-NEXT: v_cndmask_b32_e64 v13, v17, v18, s[28:29]
+; GFX900-NEXT: v_cndmask_b32_e64 v0, v17, v0, s[16:17]
+; GFX900-NEXT: v_cndmask_b32_e64 v2, v17, v2, s[4:5]
+; GFX900-NEXT: v_cndmask_b32_e64 v3, v17, v3, s[6:7]
+; GFX900-NEXT: v_cndmask_b32_e64 v4, v17, v4, s[8:9]
+; GFX900-NEXT: v_cndmask_b32_e64 v5, v17, v5, s[10:11]
+; GFX900-NEXT: v_cndmask_b32_e64 v6, v17, v6, s[12:13]
+; GFX900-NEXT: v_cndmask_b32_e64 v7, v17, v7, s[14:15]
+; GFX900-NEXT: v_cndmask_b32_e64 v8, v17, v8, s[18:19]
+; GFX900-NEXT: v_cndmask_b32_e64 v9, v17, v9, s[20:21]
+; GFX900-NEXT: v_cndmask_b32_e64 v10, v17, v10, s[22:23]
+; GFX900-NEXT: v_cndmask_b32_e64 v11, v17, v11, s[24:25]
+; GFX900-NEXT: v_cndmask_b32_e64 v12, v17, v12, s[26:27]
+; GFX900-NEXT: v_cndmask_b32_e64 v14, v17, v19, s[30:31]
+; GFX900-NEXT: v_readlane_b32 s31, v31, 1
+; GFX900-NEXT: v_readlane_b32 s30, v31, 0
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_min_f32_e32 v18, v15, v16
+; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v15, v16
+; GFX900-NEXT: v_cndmask_b32_e32 v15, v17, v18, vcc
+; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; GFX900-NEXT: s_mov_b64 exec, s[4:5]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v16f32:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: scratch_load_dword v31, off, s32
+; GFX950-NEXT: v_mov_b32_e32 v32, 0x7fc00000
+; GFX950-NEXT: v_min_f32_e32 v33, v0, v16
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v16
+; GFX950-NEXT: v_min_f32_e32 v34, v1, v17
+; GFX950-NEXT: v_min_f32_e32 v35, v2, v18
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v32, v33, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v17
+; GFX950-NEXT: v_min_f32_e32 v36, v3, v19
+; GFX950-NEXT: v_min_f32_e32 v37, v4, v20
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v32, v34, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v2, v18
+; GFX950-NEXT: v_min_f32_e32 v38, v5, v21
+; GFX950-NEXT: v_min_f32_e32 v39, v6, v22
+; GFX950-NEXT: v_cndmask_b32_e32 v2, v32, v35, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v3, v19
+; GFX950-NEXT: v_min_f32_e32 v48, v7, v23
+; GFX950-NEXT: v_min_f32_e32 v49, v8, v24
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v32, v36, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v4, v20
+; GFX950-NEXT: v_min_f32_e32 v50, v9, v25
+; GFX950-NEXT: v_min_f32_e32 v51, v10, v26
+; GFX950-NEXT: v_cndmask_b32_e32 v4, v32, v37, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v5, v21
+; GFX950-NEXT: v_min_f32_e32 v52, v11, v27
+; GFX950-NEXT: v_min_f32_e32 v53, v12, v28
+; GFX950-NEXT: v_cndmask_b32_e32 v5, v32, v38, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v6, v22
+; GFX950-NEXT: v_min_f32_e32 v54, v13, v29
+; GFX950-NEXT: v_min_f32_e32 v55, v14, v30
+; GFX950-NEXT: v_cndmask_b32_e32 v6, v32, v39, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v7, v23
+; GFX950-NEXT: s_waitcnt vmcnt(0)
+; GFX950-NEXT: v_min_f32_e32 v16, v15, v31
+; GFX950-NEXT: v_cndmask_b32_e32 v7, v32, v48, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v8, v24
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v8, v32, v49, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v9, v25
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v9, v32, v50, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v10, v26
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v10, v32, v51, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v11, v27
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v11, v32, v52, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v12, v28
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v12, v32, v53, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v13, v29
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v13, v32, v54, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v14, v30
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v14, v32, v55, vcc
+; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v15, v31
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v15, v32, v16, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v16f32:
; GFX10: ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll
index 567582c9f58ff2..71fdd691a15122 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll
@@ -2,7 +2,8 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck -check-prefixes=GCN,GFX7 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GCN,GFX8 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GCN,GFX940 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX900 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX950 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX10 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GCN,GFX12 %s
@@ -28,26 +29,26 @@ define double @v_minimum_f64(double %src0, double %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_f64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX9-NEXT: v_mov_b32_e32 v1, 0x7ff80000
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_f64:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX940-NEXT: v_mov_b32_e32 v1, 0x7ff80000
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_f64:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3]
+; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX900-NEXT: v_mov_b32_e32 v1, 0x7ff80000
+; GFX900-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_f64:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX950-NEXT: v_mov_b32_e32 v1, 0x7ff80000
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_f64:
; GFX10: ; %bb.0:
@@ -100,12 +101,6 @@ define double @v_minimum_f64__nnan(double %src0, double %src1) {
; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_minimum_f64__nnan:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_minimum_f64__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -152,26 +147,26 @@ define double @v_minimum_f64__nsz(double %src0, double %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_f64__nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX9-NEXT: v_mov_b32_e32 v1, 0x7ff80000
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_f64__nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX940-NEXT: v_mov_b32_e32 v1, 0x7ff80000
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_f64__nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3]
+; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX900-NEXT: v_mov_b32_e32 v1, 0x7ff80000
+; GFX900-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_f64__nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX950-NEXT: v_mov_b32_e32 v1, 0x7ff80000
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_f64__nsz:
; GFX10: ; %bb.0:
@@ -224,12 +219,6 @@ define double @v_minimum_f64__nnan_nsz(double %src0, double %src1) {
; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_minimum_f64__nnan_nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_minimum_f64__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -278,28 +267,28 @@ define double @v_minimum_f64__nnan_src0(double %arg0, double %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_f64__nnan_src0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_add_f64 v[0:1], v[0:1], 1.0
-; GFX9-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX9-NEXT: v_mov_b32_e32 v1, 0x7ff80000
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_f64__nnan_src0:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_add_f64 v[0:1], v[0:1], 1.0
-; GFX940-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX940-NEXT: v_mov_b32_e32 v1, 0x7ff80000
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_f64__nnan_src0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_add_f64 v[0:1], v[0:1], 1.0
+; GFX900-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3]
+; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX900-NEXT: v_mov_b32_e32 v1, 0x7ff80000
+; GFX900-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_f64__nnan_src0:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_add_f64 v[0:1], v[0:1], 1.0
+; GFX950-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX950-NEXT: v_mov_b32_e32 v1, 0x7ff80000
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_f64__nnan_src0:
; GFX10: ; %bb.0:
@@ -362,28 +351,28 @@ define double @v_minimum_f64__nnan_src1(double %src0, double %arg1) {
; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_f64__nnan_src1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_add_f64 v[2:3], v[2:3], 1.0
-; GFX9-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX9-NEXT: v_mov_b32_e32 v1, 0x7ff80000
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_f64__nnan_src1:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_add_f64 v[2:3], v[2:3], 1.0
-; GFX940-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
-; GFX940-NEXT: v_mov_b32_e32 v1, 0x7ff80000
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_f64__nnan_src1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_add_f64 v[2:3], v[2:3], 1.0
+; GFX900-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3]
+; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX900-NEXT: v_mov_b32_e32 v1, 0x7ff80000
+; GFX900-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_f64__nnan_src1:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_add_f64 v[2:3], v[2:3], 1.0
+; GFX950-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX950-NEXT: v_mov_b32_e32 v1, 0x7ff80000
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_f64__nnan_src1:
; GFX10: ; %bb.0:
@@ -454,35 +443,35 @@ define void @s_minimum_f64(double inreg %src0, double inreg %src1) {
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: s_minimum_f64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v0, s18
-; GFX9-NEXT: v_mov_b32_e32 v1, s19
-; GFX9-NEXT: v_min_f64 v[2:3], s[16:17], v[0:1]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, s[16:17], v[0:1]
-; GFX9-NEXT: v_mov_b32_e32 v4, 0x7ff80000
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use v[0:1]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: s_minimum_f64:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
-; GFX940-NEXT: v_min_f64 v[2:3], s[0:1], v[0:1]
-; GFX940-NEXT: v_mov_b32_e32 v4, 0x7ff80000
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, s[0:1], v[0:1]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX940-NEXT: ;;#ASMSTART
-; GFX940-NEXT: ; use v[0:1]
-; GFX940-NEXT: ;;#ASMEND
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_minimum_f64:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, s18
+; GFX900-NEXT: v_mov_b32_e32 v1, s19
+; GFX900-NEXT: v_min_f64 v[2:3], s[16:17], v[0:1]
+; GFX900-NEXT: v_cmp_u_f64_e32 vcc, s[16:17], v[0:1]
+; GFX900-NEXT: v_mov_b32_e32 v4, 0x7ff80000
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX900-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use v[0:1]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: s_minimum_f64:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
+; GFX950-NEXT: v_min_f64 v[2:3], s[0:1], v[0:1]
+; GFX950-NEXT: v_mov_b32_e32 v4, 0x7ff80000
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, s[0:1], v[0:1]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX950-NEXT: ;;#ASMSTART
+; GFX950-NEXT: ; use v[0:1]
+; GFX950-NEXT: ;;#ASMEND
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: s_minimum_f64:
; GFX10: ; %bb.0:
@@ -555,35 +544,35 @@ define <2 x double> @v_minimum_v2f64(<2 x double> %src0, <2 x double> %src1) {
; GFX8-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v2f64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f64 v[8:9], v[0:1], v[4:5]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX9-NEXT: v_min_f64 v[4:5], v[2:3], v[6:7]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x7ff80000
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_v2f64:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f64 v[8:9], v[0:1], v[4:5]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX940-NEXT: v_min_f64 v[4:5], v[2:3], v[6:7]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
-; GFX940-NEXT: v_mov_b32_e32 v8, 0x7ff80000
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v9, v8, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[6:7]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v5, v8, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v2f64:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f64 v[8:9], v[0:1], v[4:5]
+; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX900-NEXT: v_min_f64 v[4:5], v[2:3], v[6:7]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
+; GFX900-NEXT: v_mov_b32_e32 v3, 0x7ff80000
+; GFX900-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc
+; GFX900-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5]
+; GFX900-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v2f64:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f64 v[8:9], v[0:1], v[4:5]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX950-NEXT: v_min_f64 v[4:5], v[2:3], v[6:7]
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
+; GFX950-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v9, v8, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[6:7]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v5, v8, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v2f64:
; GFX10: ; %bb.0:
@@ -648,13 +637,6 @@ define <2 x double> @v_minimum_v2f64__nnan(<2 x double> %src0, <2 x double> %src
; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_minimum_v2f64__nnan:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
-; GFX940-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_minimum_v2f64__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -712,35 +694,35 @@ define <2 x double> @v_minimum_v2f64__nsz(<2 x double> %src0, <2 x double> %src1
; GFX8-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v2f64__nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f64 v[8:9], v[0:1], v[4:5]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX9-NEXT: v_min_f64 v[4:5], v[2:3], v[6:7]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x7ff80000
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_v2f64__nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f64 v[8:9], v[0:1], v[4:5]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX940-NEXT: v_min_f64 v[4:5], v[2:3], v[6:7]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
-; GFX940-NEXT: v_mov_b32_e32 v8, 0x7ff80000
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v9, v8, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[6:7]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v5, v8, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v2f64__nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f64 v[8:9], v[0:1], v[4:5]
+; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX900-NEXT: v_min_f64 v[4:5], v[2:3], v[6:7]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
+; GFX900-NEXT: v_mov_b32_e32 v3, 0x7ff80000
+; GFX900-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc
+; GFX900-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5]
+; GFX900-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v2f64__nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f64 v[8:9], v[0:1], v[4:5]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX950-NEXT: v_min_f64 v[4:5], v[2:3], v[6:7]
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
+; GFX950-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v9, v8, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[6:7]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v5, v8, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v2f64__nsz:
; GFX10: ; %bb.0:
@@ -805,13 +787,6 @@ define <2 x double> @v_minimum_v2f64__nnan_nsz(<2 x double> %src0, <2 x double>
; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_minimum_v2f64__nnan_nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
-; GFX940-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_minimum_v2f64__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -883,46 +858,46 @@ define void @s_minimum_v2f64(<2 x double> inreg %src0, <2 x double> inreg %src1)
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: s_minimum_v2f64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v0, s22
-; GFX9-NEXT: v_mov_b32_e32 v4, s20
-; GFX9-NEXT: v_mov_b32_e32 v1, s23
-; GFX9-NEXT: v_mov_b32_e32 v5, s21
-; GFX9-NEXT: v_min_f64 v[2:3], s[18:19], v[0:1]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, s[18:19], v[0:1]
-; GFX9-NEXT: v_min_f64 v[0:1], s[16:17], v[4:5]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], s[16:17], v[4:5]
-; GFX9-NEXT: v_mov_b32_e32 v6, 0x7ff80000
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, 0, s[4:5]
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use v[0:3]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: s_minimum_v2f64:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[18:19]
-; GFX940-NEXT: v_min_f64 v[2:3], s[2:3], v[0:1]
-; GFX940-NEXT: v_mov_b32_e32 v6, 0x7ff80000
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, s[2:3], v[0:1]
-; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[16:17]
-; GFX940-NEXT: v_min_f64 v[4:5], s[0:1], v[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, s[0:1], v[0:1]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX940-NEXT: ;;#ASMSTART
-; GFX940-NEXT: ; use v[0:3]
-; GFX940-NEXT: ;;#ASMEND
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_minimum_v2f64:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, s22
+; GFX900-NEXT: v_mov_b32_e32 v4, s20
+; GFX900-NEXT: v_mov_b32_e32 v1, s23
+; GFX900-NEXT: v_mov_b32_e32 v5, s21
+; GFX900-NEXT: v_min_f64 v[2:3], s[18:19], v[0:1]
+; GFX900-NEXT: v_cmp_u_f64_e32 vcc, s[18:19], v[0:1]
+; GFX900-NEXT: v_min_f64 v[0:1], s[16:17], v[4:5]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[4:5], s[16:17], v[4:5]
+; GFX900-NEXT: v_mov_b32_e32 v6, 0x7ff80000
+; GFX900-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc
+; GFX900-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
+; GFX900-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[4:5]
+; GFX900-NEXT: v_cndmask_b32_e64 v0, v0, 0, s[4:5]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use v[0:3]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: s_minimum_v2f64:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[18:19]
+; GFX950-NEXT: v_min_f64 v[2:3], s[2:3], v[0:1]
+; GFX950-NEXT: v_mov_b32_e32 v6, 0x7ff80000
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, s[2:3], v[0:1]
+; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[16:17]
+; GFX950-NEXT: v_min_f64 v[4:5], s[0:1], v[0:1]
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, s[0:1], v[0:1]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
+; GFX950-NEXT: ;;#ASMSTART
+; GFX950-NEXT: ; use v[0:3]
+; GFX950-NEXT: ;;#ASMEND
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: s_minimum_v2f64:
; GFX10: ; %bb.0:
@@ -1012,44 +987,44 @@ define <3 x double> @v_minimum_v3f64(<3 x double> %src0, <3 x double> %src1) {
; GFX8-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v3f64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f64 v[12:13], v[0:1], v[6:7]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX9-NEXT: v_min_f64 v[6:7], v[2:3], v[8:9]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
-; GFX9-NEXT: v_min_f64 v[8:9], v[4:5], v[10:11]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
-; GFX9-NEXT: v_mov_b32_e32 v5, 0x7ff80000
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_v3f64:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f64 v[12:13], v[0:1], v[6:7]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX940-NEXT: v_min_f64 v[6:7], v[2:3], v[8:9]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
-; GFX940-NEXT: v_mov_b32_e32 v12, 0x7ff80000
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v13, v12, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[8:9]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v12, vcc
-; GFX940-NEXT: v_min_f64 v[6:7], v[4:5], v[10:11]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[10:11]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v6, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v7, v12, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v3f64:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f64 v[12:13], v[0:1], v[6:7]
+; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX900-NEXT: v_min_f64 v[6:7], v[2:3], v[8:9]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
+; GFX900-NEXT: v_min_f64 v[8:9], v[4:5], v[10:11]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
+; GFX900-NEXT: v_mov_b32_e32 v5, 0x7ff80000
+; GFX900-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc
+; GFX900-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5]
+; GFX900-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5]
+; GFX900-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7]
+; GFX900-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v3f64:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f64 v[12:13], v[0:1], v[6:7]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX950-NEXT: v_min_f64 v[6:7], v[2:3], v[8:9]
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
+; GFX950-NEXT: v_mov_b32_e32 v12, 0x7ff80000
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v13, v12, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[8:9]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v7, v12, vcc
+; GFX950-NEXT: v_min_f64 v[6:7], v[4:5], v[10:11]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[10:11]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v4, v6, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v5, v7, v12, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v3f64:
; GFX10: ; %bb.0:
@@ -1125,14 +1100,6 @@ define <3 x double> @v_minimum_v3f64__nnan(<3 x double> %src0, <3 x double> %src
; GFX9-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_minimum_v3f64__nnan:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
-; GFX940-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
-; GFX940-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_minimum_v3f64__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1201,44 +1168,44 @@ define <3 x double> @v_minimum_v3f64__nsz(<3 x double> %src0, <3 x double> %src1
; GFX8-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v3f64__nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f64 v[12:13], v[0:1], v[6:7]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX9-NEXT: v_min_f64 v[6:7], v[2:3], v[8:9]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
-; GFX9-NEXT: v_min_f64 v[8:9], v[4:5], v[10:11]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
-; GFX9-NEXT: v_mov_b32_e32 v5, 0x7ff80000
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_v3f64__nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f64 v[12:13], v[0:1], v[6:7]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX940-NEXT: v_min_f64 v[6:7], v[2:3], v[8:9]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
-; GFX940-NEXT: v_mov_b32_e32 v12, 0x7ff80000
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v13, v12, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[8:9]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v12, vcc
-; GFX940-NEXT: v_min_f64 v[6:7], v[4:5], v[10:11]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[10:11]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v6, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v7, v12, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v3f64__nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f64 v[12:13], v[0:1], v[6:7]
+; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX900-NEXT: v_min_f64 v[6:7], v[2:3], v[8:9]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
+; GFX900-NEXT: v_min_f64 v[8:9], v[4:5], v[10:11]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
+; GFX900-NEXT: v_mov_b32_e32 v5, 0x7ff80000
+; GFX900-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc
+; GFX900-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5]
+; GFX900-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5]
+; GFX900-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7]
+; GFX900-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v3f64__nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f64 v[12:13], v[0:1], v[6:7]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX950-NEXT: v_min_f64 v[6:7], v[2:3], v[8:9]
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
+; GFX950-NEXT: v_mov_b32_e32 v12, 0x7ff80000
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v13, v12, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[8:9]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v7, v12, vcc
+; GFX950-NEXT: v_min_f64 v[6:7], v[4:5], v[10:11]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[10:11]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v4, v6, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v5, v7, v12, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v3f64__nsz:
; GFX10: ; %bb.0:
@@ -1314,14 +1281,6 @@ define <3 x double> @v_minimum_v3f64__nnan_nsz(<3 x double> %src0, <3 x double>
; GFX9-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_minimum_v3f64__nnan_nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
-; GFX940-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
-; GFX940-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_minimum_v3f64__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1398,53 +1357,53 @@ define <4 x double> @v_minimum_v4f64(<4 x double> %src0, <4 x double> %src1) {
; GFX8-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v4f64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f64 v[16:17], v[0:1], v[8:9]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
-; GFX9-NEXT: v_min_f64 v[8:9], v[2:3], v[10:11]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
-; GFX9-NEXT: v_min_f64 v[10:11], v[4:5], v[12:13]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
-; GFX9-NEXT: v_min_f64 v[12:13], v[6:7], v[14:15]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
-; GFX9-NEXT: v_mov_b32_e32 v7, 0x7ff80000
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_v4f64:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f64 v[16:17], v[0:1], v[8:9]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
-; GFX940-NEXT: v_min_f64 v[8:9], v[2:3], v[10:11]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
-; GFX940-NEXT: v_mov_b32_e32 v16, 0x7ff80000
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v17, v16, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[10:11]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v8, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v9, v16, vcc
-; GFX940-NEXT: v_min_f64 v[8:9], v[4:5], v[12:13]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[12:13]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v8, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v9, v16, vcc
-; GFX940-NEXT: v_min_f64 v[8:9], v[6:7], v[14:15]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[14:15]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v6, v8, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v9, v16, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v4f64:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f64 v[16:17], v[0:1], v[8:9]
+; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX900-NEXT: v_min_f64 v[8:9], v[2:3], v[10:11]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
+; GFX900-NEXT: v_min_f64 v[10:11], v[4:5], v[12:13]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
+; GFX900-NEXT: v_min_f64 v[12:13], v[6:7], v[14:15]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
+; GFX900-NEXT: v_mov_b32_e32 v7, 0x7ff80000
+; GFX900-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc
+; GFX900-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5]
+; GFX900-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5]
+; GFX900-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7]
+; GFX900-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7]
+; GFX900-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9]
+; GFX900-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v4f64:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f64 v[16:17], v[0:1], v[8:9]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX950-NEXT: v_min_f64 v[8:9], v[2:3], v[10:11]
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
+; GFX950-NEXT: v_mov_b32_e32 v16, 0x7ff80000
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v17, v16, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[10:11]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v8, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v9, v16, vcc
+; GFX950-NEXT: v_min_f64 v[8:9], v[4:5], v[12:13]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[12:13]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v4, v8, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v5, v9, v16, vcc
+; GFX950-NEXT: v_min_f64 v[8:9], v[6:7], v[14:15]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[14:15]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v6, v8, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v7, v9, v16, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v4f64:
; GFX10: ; %bb.0:
@@ -1532,15 +1491,6 @@ define <4 x double> @v_minimum_v4f64__nnan(<4 x double> %src0, <4 x double> %src
; GFX9-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_minimum_v4f64__nnan:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
-; GFX940-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
-; GFX940-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
-; GFX940-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_minimum_v4f64__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1620,53 +1570,53 @@ define <4 x double> @v_minimum_v4f64__nsz(<4 x double> %src0, <4 x double> %src1
; GFX8-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v4f64__nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f64 v[16:17], v[0:1], v[8:9]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
-; GFX9-NEXT: v_min_f64 v[8:9], v[2:3], v[10:11]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
-; GFX9-NEXT: v_min_f64 v[10:11], v[4:5], v[12:13]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
-; GFX9-NEXT: v_min_f64 v[12:13], v[6:7], v[14:15]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
-; GFX9-NEXT: v_mov_b32_e32 v7, 0x7ff80000
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_v4f64__nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f64 v[16:17], v[0:1], v[8:9]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
-; GFX940-NEXT: v_min_f64 v[8:9], v[2:3], v[10:11]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
-; GFX940-NEXT: v_mov_b32_e32 v16, 0x7ff80000
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v17, v16, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[10:11]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v8, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v9, v16, vcc
-; GFX940-NEXT: v_min_f64 v[8:9], v[4:5], v[12:13]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[12:13]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v8, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v9, v16, vcc
-; GFX940-NEXT: v_min_f64 v[8:9], v[6:7], v[14:15]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[14:15]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v6, v8, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v9, v16, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v4f64__nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f64 v[16:17], v[0:1], v[8:9]
+; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX900-NEXT: v_min_f64 v[8:9], v[2:3], v[10:11]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
+; GFX900-NEXT: v_min_f64 v[10:11], v[4:5], v[12:13]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
+; GFX900-NEXT: v_min_f64 v[12:13], v[6:7], v[14:15]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
+; GFX900-NEXT: v_mov_b32_e32 v7, 0x7ff80000
+; GFX900-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc
+; GFX900-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5]
+; GFX900-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5]
+; GFX900-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7]
+; GFX900-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7]
+; GFX900-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9]
+; GFX900-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v4f64__nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f64 v[16:17], v[0:1], v[8:9]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX950-NEXT: v_min_f64 v[8:9], v[2:3], v[10:11]
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
+; GFX950-NEXT: v_mov_b32_e32 v16, 0x7ff80000
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v17, v16, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[10:11]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v8, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v9, v16, vcc
+; GFX950-NEXT: v_min_f64 v[8:9], v[4:5], v[12:13]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[12:13]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v4, v8, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v5, v9, v16, vcc
+; GFX950-NEXT: v_min_f64 v[8:9], v[6:7], v[14:15]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[14:15]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v6, v8, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v7, v9, v16, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v4f64__nsz:
; GFX10: ; %bb.0:
@@ -1754,15 +1704,6 @@ define <4 x double> @v_minimum_v4f64__nnan_nsz(<4 x double> %src0, <4 x double>
; GFX9-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX940-LABEL: v_minimum_v4f64__nnan_nsz:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
-; GFX940-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
-; GFX940-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
-; GFX940-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: v_minimum_v4f64__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1878,89 +1819,89 @@ define <8 x double> @v_minimum_v8f64(<8 x double> %src0, <8 x double> %src1) {
; GFX8-NEXT: v_cndmask_b32_e32 v15, v19, v34, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v8f64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32
-; GFX9-NEXT: v_min_f64 v[32:33], v[2:3], v[18:19]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[18:19]
-; GFX9-NEXT: v_min_f64 v[18:19], v[4:5], v[20:21]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[4:5], v[20:21]
-; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], v[16:17]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[8:9], v[0:1], v[16:17]
-; GFX9-NEXT: v_mov_b32_e32 v34, 0x7ff80000
-; GFX9-NEXT: v_min_f64 v[20:21], v[6:7], v[22:23]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[6:7], v[6:7], v[22:23]
-; GFX9-NEXT: v_min_f64 v[16:17], v[8:9], v[24:25]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[24:25]
-; GFX9-NEXT: v_min_f64 v[22:23], v[10:11], v[26:27]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[12:13], v[10:11], v[26:27]
-; GFX9-NEXT: v_min_f64 v[24:25], v[12:13], v[28:29]
-; GFX9-NEXT: v_cmp_u_f64_e64 s[14:15], v[12:13], v[28:29]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v3, v34, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v32, 0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v33, v34, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v18, 0, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v19, v34, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v20, 0, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v21, v34, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v8, v16, 0, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v9, v17, v34, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v10, v22, 0, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v11, v23, v34, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v12, v24, 0, s[14:15]
-; GFX9-NEXT: v_cndmask_b32_e64 v13, v25, v34, s[14:15]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_min_f64 v[18:19], v[14:15], v[30:31]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[30:31]
-; GFX9-NEXT: v_cndmask_b32_e64 v14, v18, 0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v15, v19, v34, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_v8f64:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: scratch_load_dword v31, off, s32
-; GFX940-NEXT: v_mov_b32_e32 v54, 0x7ff80000
-; GFX940-NEXT: v_min_f64 v[32:33], v[0:1], v[16:17]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[16:17]
-; GFX940-NEXT: v_min_f64 v[34:35], v[2:3], v[18:19]
-; GFX940-NEXT: v_min_f64 v[36:37], v[4:5], v[20:21]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v32, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v33, v54, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[18:19]
-; GFX940-NEXT: v_min_f64 v[38:39], v[6:7], v[22:23]
-; GFX940-NEXT: v_min_f64 v[48:49], v[8:9], v[24:25]
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v34, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v35, v54, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[20:21]
-; GFX940-NEXT: v_min_f64 v[50:51], v[10:11], v[26:27]
-; GFX940-NEXT: v_min_f64 v[52:53], v[12:13], v[28:29]
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v36, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v37, v54, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[22:23]
-; GFX940-NEXT: s_waitcnt vmcnt(0)
-; GFX940-NEXT: v_min_f64 v[16:17], v[14:15], v[30:31]
-; GFX940-NEXT: v_cndmask_b32_e64 v6, v38, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v39, v54, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[8:9], v[24:25]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v8, v48, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v9, v49, v54, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[10:11], v[26:27]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v10, v50, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v11, v51, v54, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[12:13], v[28:29]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v12, v52, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v13, v53, v54, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[30:31]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v14, v16, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v15, v17, v54, vcc
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v8f64:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32
+; GFX900-NEXT: v_min_f64 v[32:33], v[2:3], v[18:19]
+; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[18:19]
+; GFX900-NEXT: v_min_f64 v[18:19], v[4:5], v[20:21]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[4:5], v[4:5], v[20:21]
+; GFX900-NEXT: v_min_f64 v[2:3], v[0:1], v[16:17]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[8:9], v[0:1], v[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v34, 0x7ff80000
+; GFX900-NEXT: v_min_f64 v[20:21], v[6:7], v[22:23]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[6:7], v[6:7], v[22:23]
+; GFX900-NEXT: v_min_f64 v[16:17], v[8:9], v[24:25]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[24:25]
+; GFX900-NEXT: v_min_f64 v[22:23], v[10:11], v[26:27]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[12:13], v[10:11], v[26:27]
+; GFX900-NEXT: v_min_f64 v[24:25], v[12:13], v[28:29]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[14:15], v[12:13], v[28:29]
+; GFX900-NEXT: v_cndmask_b32_e64 v0, v2, 0, s[8:9]
+; GFX900-NEXT: v_cndmask_b32_e64 v1, v3, v34, s[8:9]
+; GFX900-NEXT: v_cndmask_b32_e64 v2, v32, 0, vcc
+; GFX900-NEXT: v_cndmask_b32_e32 v3, v33, v34, vcc
+; GFX900-NEXT: v_cndmask_b32_e64 v4, v18, 0, s[4:5]
+; GFX900-NEXT: v_cndmask_b32_e64 v5, v19, v34, s[4:5]
+; GFX900-NEXT: v_cndmask_b32_e64 v6, v20, 0, s[6:7]
+; GFX900-NEXT: v_cndmask_b32_e64 v7, v21, v34, s[6:7]
+; GFX900-NEXT: v_cndmask_b32_e64 v8, v16, 0, s[10:11]
+; GFX900-NEXT: v_cndmask_b32_e64 v9, v17, v34, s[10:11]
+; GFX900-NEXT: v_cndmask_b32_e64 v10, v22, 0, s[12:13]
+; GFX900-NEXT: v_cndmask_b32_e64 v11, v23, v34, s[12:13]
+; GFX900-NEXT: v_cndmask_b32_e64 v12, v24, 0, s[14:15]
+; GFX900-NEXT: v_cndmask_b32_e64 v13, v25, v34, s[14:15]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_min_f64 v[18:19], v[14:15], v[30:31]
+; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[30:31]
+; GFX900-NEXT: v_cndmask_b32_e64 v14, v18, 0, vcc
+; GFX900-NEXT: v_cndmask_b32_e32 v15, v19, v34, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v8f64:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: scratch_load_dword v31, off, s32
+; GFX950-NEXT: v_mov_b32_e32 v54, 0x7ff80000
+; GFX950-NEXT: v_min_f64 v[32:33], v[0:1], v[16:17]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[16:17]
+; GFX950-NEXT: v_min_f64 v[34:35], v[2:3], v[18:19]
+; GFX950-NEXT: v_min_f64 v[36:37], v[4:5], v[20:21]
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v32, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v33, v54, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[18:19]
+; GFX950-NEXT: v_min_f64 v[38:39], v[6:7], v[22:23]
+; GFX950-NEXT: v_min_f64 v[48:49], v[8:9], v[24:25]
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v34, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v35, v54, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[20:21]
+; GFX950-NEXT: v_min_f64 v[50:51], v[10:11], v[26:27]
+; GFX950-NEXT: v_min_f64 v[52:53], v[12:13], v[28:29]
+; GFX950-NEXT: v_cndmask_b32_e64 v4, v36, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v5, v37, v54, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[22:23]
+; GFX950-NEXT: s_waitcnt vmcnt(0)
+; GFX950-NEXT: v_min_f64 v[16:17], v[14:15], v[30:31]
+; GFX950-NEXT: v_cndmask_b32_e64 v6, v38, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v7, v39, v54, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[8:9], v[24:25]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v8, v48, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v9, v49, v54, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[10:11], v[26:27]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v10, v50, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v11, v51, v54, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[12:13], v[28:29]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v12, v52, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v13, v53, v54, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[30:31]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v14, v16, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v15, v17, v54, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v8f64:
; GFX10: ; %bb.0:
@@ -2332,295 +2273,295 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1)
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v16f64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX9-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4
-; GFX9-NEXT: v_writelane_b32 v34, s30, 0
-; GFX9-NEXT: v_writelane_b32 v34, s31, 1
-; GFX9-NEXT: v_writelane_b32 v34, s34, 2
-; GFX9-NEXT: v_writelane_b32 v34, s35, 3
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[31:32]
-; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[31:32]
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:16
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:12
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[31:32]
-; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[31:32]
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:24
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:20
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, 0, s[4:5]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[31:32]
-; GFX9-NEXT: v_min_f64 v[4:5], v[4:5], v[31:32]
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:32
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v4, 0, s[6:7]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[31:32]
-; GFX9-NEXT: v_min_f64 v[6:7], v[6:7], v[31:32]
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:36
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:40
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v6, 0, s[8:9]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[31:32]
-; GFX9-NEXT: v_min_f64 v[8:9], v[8:9], v[31:32]
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:48
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:44
-; GFX9-NEXT: v_cndmask_b32_e64 v8, v8, 0, s[10:11]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e64 s[12:13], v[10:11], v[31:32]
-; GFX9-NEXT: v_min_f64 v[10:11], v[10:11], v[31:32]
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:56
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:52
-; GFX9-NEXT: v_cndmask_b32_e64 v10, v10, 0, s[12:13]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e64 s[14:15], v[12:13], v[31:32]
-; GFX9-NEXT: v_min_f64 v[12:13], v[12:13], v[31:32]
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:64
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:60
-; GFX9-NEXT: v_cndmask_b32_e64 v12, v12, 0, s[14:15]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e64 s[16:17], v[14:15], v[31:32]
-; GFX9-NEXT: v_min_f64 v[14:15], v[14:15], v[31:32]
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:68
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:72
-; GFX9-NEXT: v_cndmask_b32_e64 v14, v14, 0, s[16:17]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e64 s[18:19], v[16:17], v[31:32]
-; GFX9-NEXT: v_min_f64 v[16:17], v[16:17], v[31:32]
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:80
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:76
-; GFX9-NEXT: v_cndmask_b32_e64 v16, v16, 0, s[18:19]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e64 s[20:21], v[18:19], v[31:32]
-; GFX9-NEXT: v_min_f64 v[18:19], v[18:19], v[31:32]
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:88
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:84
-; GFX9-NEXT: v_cndmask_b32_e64 v18, v18, 0, s[20:21]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e64 s[22:23], v[20:21], v[31:32]
-; GFX9-NEXT: v_min_f64 v[20:21], v[20:21], v[31:32]
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:96
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:92
-; GFX9-NEXT: v_cndmask_b32_e64 v20, v20, 0, s[22:23]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e64 s[24:25], v[22:23], v[31:32]
-; GFX9-NEXT: v_min_f64 v[22:23], v[22:23], v[31:32]
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:100
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:104
-; GFX9-NEXT: v_cndmask_b32_e64 v22, v22, 0, s[24:25]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e64 s[26:27], v[24:25], v[31:32]
-; GFX9-NEXT: v_min_f64 v[24:25], v[24:25], v[31:32]
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:112
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:108
-; GFX9-NEXT: v_cndmask_b32_e64 v24, v24, 0, s[26:27]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e64 s[28:29], v[26:27], v[31:32]
-; GFX9-NEXT: v_min_f64 v[26:27], v[26:27], v[31:32]
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:120
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116
-; GFX9-NEXT: v_cndmask_b32_e64 v26, v26, 0, s[28:29]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e64 s[30:31], v[28:29], v[31:32]
-; GFX9-NEXT: v_min_f64 v[28:29], v[28:29], v[31:32]
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32
-; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:128
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:124
-; GFX9-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[30:31]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_u_f64_e64 s[34:35], v[30:31], v[32:33]
-; GFX9-NEXT: v_min_f64 v[30:31], v[30:31], v[32:33]
-; GFX9-NEXT: v_mov_b32_e32 v32, 0x7ff80000
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v32, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v32, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v32, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, v32, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v9, v9, v32, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v11, v11, v32, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v13, v13, v32, s[14:15]
-; GFX9-NEXT: v_cndmask_b32_e64 v15, v15, v32, s[16:17]
-; GFX9-NEXT: v_cndmask_b32_e64 v17, v17, v32, s[18:19]
-; GFX9-NEXT: v_cndmask_b32_e64 v19, v19, v32, s[20:21]
-; GFX9-NEXT: v_cndmask_b32_e64 v21, v21, v32, s[22:23]
-; GFX9-NEXT: v_cndmask_b32_e64 v23, v23, v32, s[24:25]
-; GFX9-NEXT: v_cndmask_b32_e64 v25, v25, v32, s[26:27]
-; GFX9-NEXT: v_cndmask_b32_e64 v27, v27, v32, s[28:29]
-; GFX9-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[30:31]
-; GFX9-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[34:35]
-; GFX9-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[34:35]
-; GFX9-NEXT: v_readlane_b32 s35, v34, 3
-; GFX9-NEXT: v_readlane_b32 s34, v34, 2
-; GFX9-NEXT: v_readlane_b32 s31, v34, 1
-; GFX9-NEXT: v_readlane_b32 s30, v34, 0
-; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX9-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: v_minimum_v16f64:
-; GFX940: ; %bb.0:
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_accvgpr_write_b32 a1, v40 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a2, v41 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a3, v42 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a4, v43 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a5, v44 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a6, v45 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a7, v46 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a8, v47 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a9, v56 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a10, v57 ; Reload Reuse
-; GFX940-NEXT: scratch_load_dword v37, off, s32 offset:16
-; GFX940-NEXT: scratch_load_dword v36, off, s32 offset:12
-; GFX940-NEXT: scratch_load_dword v39, off, s32 offset:24
-; GFX940-NEXT: scratch_load_dword v38, off, s32 offset:20
-; GFX940-NEXT: scratch_load_dword v49, off, s32 offset:32
-; GFX940-NEXT: scratch_load_dword v48, off, s32 offset:28
-; GFX940-NEXT: scratch_load_dword v57, off, s32 offset:8
-; GFX940-NEXT: scratch_load_dword v56, off, s32 offset:4
-; GFX940-NEXT: scratch_load_dword v47, off, s32 offset:40
-; GFX940-NEXT: scratch_load_dword v46, off, s32 offset:36
-; GFX940-NEXT: scratch_load_dword v45, off, s32 offset:48
-; GFX940-NEXT: scratch_load_dword v44, off, s32 offset:44
-; GFX940-NEXT: scratch_load_dword v43, off, s32 offset:56
-; GFX940-NEXT: scratch_load_dword v42, off, s32 offset:52
-; GFX940-NEXT: scratch_load_dword v41, off, s32 offset:64
-; GFX940-NEXT: scratch_load_dword v40, off, s32 offset:60
-; GFX940-NEXT: scratch_load_dword v55, off, s32 offset:72
-; GFX940-NEXT: scratch_load_dword v54, off, s32 offset:68
-; GFX940-NEXT: scratch_load_dword v53, off, s32 offset:80
-; GFX940-NEXT: scratch_load_dword v52, off, s32 offset:76
-; GFX940-NEXT: scratch_load_dword v51, off, s32 offset:88
-; GFX940-NEXT: scratch_load_dword v50, off, s32 offset:84
-; GFX940-NEXT: scratch_load_dword v35, off, s32 offset:96
-; GFX940-NEXT: scratch_load_dword v34, off, s32 offset:92
-; GFX940-NEXT: scratch_load_dword v31, off, s32
-; GFX940-NEXT: scratch_load_dword v33, off, s32 offset:104
-; GFX940-NEXT: scratch_load_dword v32, off, s32 offset:100
-; GFX940-NEXT: v_accvgpr_write_b32 a11, v58 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a12, v59 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a13, v60 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a14, v61 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a15, v62 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a16, v63 ; Reload Reuse
-; GFX940-NEXT: s_waitcnt vmcnt(25)
-; GFX940-NEXT: v_min_f64 v[58:59], v[2:3], v[36:37]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[36:37]
-; GFX940-NEXT: scratch_load_dword v37, off, s32 offset:112
-; GFX940-NEXT: scratch_load_dword v36, off, s32 offset:108
-; GFX940-NEXT: s_waitcnt vmcnt(25)
-; GFX940-NEXT: v_min_f64 v[60:61], v[4:5], v[38:39]
-; GFX940-NEXT: v_cmp_u_f64_e64 s[0:1], v[4:5], v[38:39]
-; GFX940-NEXT: scratch_load_dword v39, off, s32 offset:120
-; GFX940-NEXT: scratch_load_dword v38, off, s32 offset:116
-; GFX940-NEXT: s_waitcnt vmcnt(25)
-; GFX940-NEXT: v_min_f64 v[62:63], v[6:7], v[48:49]
-; GFX940-NEXT: v_cmp_u_f64_e64 s[2:3], v[6:7], v[48:49]
-; GFX940-NEXT: scratch_load_dword v49, off, s32 offset:128
-; GFX940-NEXT: scratch_load_dword v48, off, s32 offset:124
-; GFX940-NEXT: s_waitcnt vmcnt(25)
-; GFX940-NEXT: v_min_f64 v[2:3], v[0:1], v[56:57]
-; GFX940-NEXT: v_cmp_u_f64_e64 s[4:5], v[0:1], v[56:57]
-; GFX940-NEXT: v_mov_b32_e32 v0, 0x7ff80000
-; GFX940-NEXT: s_waitcnt vmcnt(23)
-; GFX940-NEXT: v_min_f64 v[56:57], v[8:9], v[46:47]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v2, 0, s[4:5]
-; GFX940-NEXT: v_accvgpr_write_b32 a0, v1
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v3, v0, s[4:5]
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v58, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v59, v0, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[8:9], v[46:47]
-; GFX940-NEXT: s_waitcnt vmcnt(21)
-; GFX940-NEXT: v_min_f64 v[46:47], v[10:11], v[44:45]
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v60, 0, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v8, v56, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v9, v57, v0, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[10:11], v[44:45]
-; GFX940-NEXT: s_waitcnt vmcnt(19)
-; GFX940-NEXT: v_min_f64 v[44:45], v[12:13], v[42:43]
-; GFX940-NEXT: v_cndmask_b32_e64 v5, v61, v0, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v10, v46, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v11, v47, v0, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[12:13], v[42:43]
-; GFX940-NEXT: s_waitcnt vmcnt(17)
-; GFX940-NEXT: v_min_f64 v[42:43], v[14:15], v[40:41]
-; GFX940-NEXT: v_cndmask_b32_e64 v6, v62, 0, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v12, v44, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v13, v45, v0, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[40:41]
-; GFX940-NEXT: s_waitcnt vmcnt(15)
-; GFX940-NEXT: v_min_f64 v[40:41], v[16:17], v[54:55]
-; GFX940-NEXT: v_cndmask_b32_e64 v7, v63, v0, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v14, v42, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v15, v43, v0, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[16:17], v[54:55]
-; GFX940-NEXT: s_waitcnt vmcnt(13)
-; GFX940-NEXT: v_min_f64 v[54:55], v[18:19], v[52:53]
-; GFX940-NEXT: v_accvgpr_read_b32 v63, a16 ; Reload Reuse
-; GFX940-NEXT: v_cndmask_b32_e64 v16, v40, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v17, v41, v0, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[18:19], v[52:53]
-; GFX940-NEXT: s_waitcnt vmcnt(11)
-; GFX940-NEXT: v_min_f64 v[52:53], v[20:21], v[50:51]
-; GFX940-NEXT: v_accvgpr_read_b32 v62, a15 ; Reload Reuse
-; GFX940-NEXT: v_cndmask_b32_e64 v18, v54, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v19, v55, v0, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[20:21], v[50:51]
-; GFX940-NEXT: s_waitcnt vmcnt(9)
-; GFX940-NEXT: v_min_f64 v[50:51], v[22:23], v[34:35]
-; GFX940-NEXT: v_accvgpr_read_b32 v61, a14 ; Reload Reuse
-; GFX940-NEXT: v_cndmask_b32_e64 v20, v52, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v21, v53, v0, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[22:23], v[34:35]
-; GFX940-NEXT: s_waitcnt vmcnt(6)
-; GFX940-NEXT: v_min_f64 v[34:35], v[24:25], v[32:33]
-; GFX940-NEXT: v_accvgpr_read_b32 v60, a13 ; Reload Reuse
-; GFX940-NEXT: v_cndmask_b32_e64 v22, v50, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v23, v51, v0, vcc
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[24:25], v[32:33]
-; GFX940-NEXT: v_accvgpr_read_b32 v59, a12 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v58, a11 ; Reload Reuse
-; GFX940-NEXT: v_cndmask_b32_e64 v24, v34, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v25, v35, v0, vcc
-; GFX940-NEXT: v_accvgpr_read_b32 v57, a10 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v56, a9 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v47, a8 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v46, a7 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v45, a6 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v44, a5 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v43, a4 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v42, a3 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v41, a2 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v40, a1 ; Reload Reuse
-; GFX940-NEXT: s_waitcnt vmcnt(4)
-; GFX940-NEXT: v_min_f64 v[32:33], v[26:27], v[36:37]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[26:27], v[36:37]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v26, v32, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v27, v33, v0, vcc
-; GFX940-NEXT: s_waitcnt vmcnt(2)
-; GFX940-NEXT: v_min_f64 v[32:33], v[28:29], v[38:39]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[28:29], v[38:39]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v28, v32, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v29, v33, v0, vcc
-; GFX940-NEXT: s_waitcnt vmcnt(0)
-; GFX940-NEXT: v_min_f64 v[32:33], v[30:31], v[48:49]
-; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[30:31], v[48:49]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v30, v32, 0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v31, v33, v0, vcc
-; GFX940-NEXT: v_accvgpr_read_b32 v0, a0
-; GFX940-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v16f64:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX900-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
+; GFX900-NEXT: s_mov_b64 exec, s[4:5]
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4
+; GFX900-NEXT: v_writelane_b32 v34, s30, 0
+; GFX900-NEXT: v_writelane_b32 v34, s31, 1
+; GFX900-NEXT: v_writelane_b32 v34, s34, 2
+; GFX900-NEXT: v_writelane_b32 v34, s35, 3
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[31:32]
+; GFX900-NEXT: v_min_f64 v[0:1], v[0:1], v[31:32]
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:16
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:12
+; GFX900-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[31:32]
+; GFX900-NEXT: v_min_f64 v[2:3], v[2:3], v[31:32]
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:24
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:20
+; GFX900-NEXT: v_cndmask_b32_e64 v2, v2, 0, s[4:5]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[31:32]
+; GFX900-NEXT: v_min_f64 v[4:5], v[4:5], v[31:32]
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:32
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28
+; GFX900-NEXT: v_cndmask_b32_e64 v4, v4, 0, s[6:7]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[31:32]
+; GFX900-NEXT: v_min_f64 v[6:7], v[6:7], v[31:32]
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:36
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:40
+; GFX900-NEXT: v_cndmask_b32_e64 v6, v6, 0, s[8:9]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[31:32]
+; GFX900-NEXT: v_min_f64 v[8:9], v[8:9], v[31:32]
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:48
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:44
+; GFX900-NEXT: v_cndmask_b32_e64 v8, v8, 0, s[10:11]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e64 s[12:13], v[10:11], v[31:32]
+; GFX900-NEXT: v_min_f64 v[10:11], v[10:11], v[31:32]
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:56
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:52
+; GFX900-NEXT: v_cndmask_b32_e64 v10, v10, 0, s[12:13]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e64 s[14:15], v[12:13], v[31:32]
+; GFX900-NEXT: v_min_f64 v[12:13], v[12:13], v[31:32]
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:64
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:60
+; GFX900-NEXT: v_cndmask_b32_e64 v12, v12, 0, s[14:15]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e64 s[16:17], v[14:15], v[31:32]
+; GFX900-NEXT: v_min_f64 v[14:15], v[14:15], v[31:32]
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:68
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:72
+; GFX900-NEXT: v_cndmask_b32_e64 v14, v14, 0, s[16:17]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e64 s[18:19], v[16:17], v[31:32]
+; GFX900-NEXT: v_min_f64 v[16:17], v[16:17], v[31:32]
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:80
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:76
+; GFX900-NEXT: v_cndmask_b32_e64 v16, v16, 0, s[18:19]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e64 s[20:21], v[18:19], v[31:32]
+; GFX900-NEXT: v_min_f64 v[18:19], v[18:19], v[31:32]
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:88
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:84
+; GFX900-NEXT: v_cndmask_b32_e64 v18, v18, 0, s[20:21]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e64 s[22:23], v[20:21], v[31:32]
+; GFX900-NEXT: v_min_f64 v[20:21], v[20:21], v[31:32]
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:96
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:92
+; GFX900-NEXT: v_cndmask_b32_e64 v20, v20, 0, s[22:23]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e64 s[24:25], v[22:23], v[31:32]
+; GFX900-NEXT: v_min_f64 v[22:23], v[22:23], v[31:32]
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:100
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:104
+; GFX900-NEXT: v_cndmask_b32_e64 v22, v22, 0, s[24:25]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e64 s[26:27], v[24:25], v[31:32]
+; GFX900-NEXT: v_min_f64 v[24:25], v[24:25], v[31:32]
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:112
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:108
+; GFX900-NEXT: v_cndmask_b32_e64 v24, v24, 0, s[26:27]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e64 s[28:29], v[26:27], v[31:32]
+; GFX900-NEXT: v_min_f64 v[26:27], v[26:27], v[31:32]
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:120
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116
+; GFX900-NEXT: v_cndmask_b32_e64 v26, v26, 0, s[28:29]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e64 s[30:31], v[28:29], v[31:32]
+; GFX900-NEXT: v_min_f64 v[28:29], v[28:29], v[31:32]
+; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32
+; GFX900-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:128
+; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:124
+; GFX900-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[30:31]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cmp_u_f64_e64 s[34:35], v[30:31], v[32:33]
+; GFX900-NEXT: v_min_f64 v[30:31], v[30:31], v[32:33]
+; GFX900-NEXT: v_mov_b32_e32 v32, 0x7ff80000
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v32, vcc
+; GFX900-NEXT: v_cndmask_b32_e64 v3, v3, v32, s[4:5]
+; GFX900-NEXT: v_cndmask_b32_e64 v5, v5, v32, s[6:7]
+; GFX900-NEXT: v_cndmask_b32_e64 v7, v7, v32, s[8:9]
+; GFX900-NEXT: v_cndmask_b32_e64 v9, v9, v32, s[10:11]
+; GFX900-NEXT: v_cndmask_b32_e64 v11, v11, v32, s[12:13]
+; GFX900-NEXT: v_cndmask_b32_e64 v13, v13, v32, s[14:15]
+; GFX900-NEXT: v_cndmask_b32_e64 v15, v15, v32, s[16:17]
+; GFX900-NEXT: v_cndmask_b32_e64 v17, v17, v32, s[18:19]
+; GFX900-NEXT: v_cndmask_b32_e64 v19, v19, v32, s[20:21]
+; GFX900-NEXT: v_cndmask_b32_e64 v21, v21, v32, s[22:23]
+; GFX900-NEXT: v_cndmask_b32_e64 v23, v23, v32, s[24:25]
+; GFX900-NEXT: v_cndmask_b32_e64 v25, v25, v32, s[26:27]
+; GFX900-NEXT: v_cndmask_b32_e64 v27, v27, v32, s[28:29]
+; GFX900-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[30:31]
+; GFX900-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[34:35]
+; GFX900-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[34:35]
+; GFX900-NEXT: v_readlane_b32 s35, v34, 3
+; GFX900-NEXT: v_readlane_b32 s34, v34, 2
+; GFX900-NEXT: v_readlane_b32 s31, v34, 1
+; GFX900-NEXT: v_readlane_b32 s30, v34, 0
+; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX900-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload
+; GFX900-NEXT: s_mov_b64 exec, s[4:5]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v16f64:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_accvgpr_write_b32 a1, v40 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_write_b32 a2, v41 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_write_b32 a3, v42 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_write_b32 a4, v43 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_write_b32 a5, v44 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_write_b32 a6, v45 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_write_b32 a7, v46 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_write_b32 a8, v47 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_write_b32 a9, v56 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_write_b32 a10, v57 ; Reload Reuse
+; GFX950-NEXT: scratch_load_dword v37, off, s32 offset:16
+; GFX950-NEXT: scratch_load_dword v36, off, s32 offset:12
+; GFX950-NEXT: scratch_load_dword v39, off, s32 offset:24
+; GFX950-NEXT: scratch_load_dword v38, off, s32 offset:20
+; GFX950-NEXT: scratch_load_dword v49, off, s32 offset:32
+; GFX950-NEXT: scratch_load_dword v48, off, s32 offset:28
+; GFX950-NEXT: scratch_load_dword v57, off, s32 offset:8
+; GFX950-NEXT: scratch_load_dword v56, off, s32 offset:4
+; GFX950-NEXT: scratch_load_dword v47, off, s32 offset:40
+; GFX950-NEXT: scratch_load_dword v46, off, s32 offset:36
+; GFX950-NEXT: scratch_load_dword v45, off, s32 offset:48
+; GFX950-NEXT: scratch_load_dword v44, off, s32 offset:44
+; GFX950-NEXT: scratch_load_dword v43, off, s32 offset:56
+; GFX950-NEXT: scratch_load_dword v42, off, s32 offset:52
+; GFX950-NEXT: scratch_load_dword v41, off, s32 offset:64
+; GFX950-NEXT: scratch_load_dword v40, off, s32 offset:60
+; GFX950-NEXT: scratch_load_dword v55, off, s32 offset:72
+; GFX950-NEXT: scratch_load_dword v54, off, s32 offset:68
+; GFX950-NEXT: scratch_load_dword v53, off, s32 offset:80
+; GFX950-NEXT: scratch_load_dword v52, off, s32 offset:76
+; GFX950-NEXT: scratch_load_dword v51, off, s32 offset:88
+; GFX950-NEXT: scratch_load_dword v50, off, s32 offset:84
+; GFX950-NEXT: scratch_load_dword v35, off, s32 offset:96
+; GFX950-NEXT: scratch_load_dword v34, off, s32 offset:92
+; GFX950-NEXT: scratch_load_dword v31, off, s32
+; GFX950-NEXT: scratch_load_dword v33, off, s32 offset:104
+; GFX950-NEXT: scratch_load_dword v32, off, s32 offset:100
+; GFX950-NEXT: v_accvgpr_write_b32 a11, v58 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_write_b32 a12, v59 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_write_b32 a13, v60 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_write_b32 a14, v61 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_write_b32 a15, v62 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_write_b32 a16, v63 ; Reload Reuse
+; GFX950-NEXT: s_waitcnt vmcnt(25)
+; GFX950-NEXT: v_min_f64 v[58:59], v[2:3], v[36:37]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[36:37]
+; GFX950-NEXT: scratch_load_dword v37, off, s32 offset:112
+; GFX950-NEXT: scratch_load_dword v36, off, s32 offset:108
+; GFX950-NEXT: s_waitcnt vmcnt(25)
+; GFX950-NEXT: v_min_f64 v[60:61], v[4:5], v[38:39]
+; GFX950-NEXT: v_cmp_u_f64_e64 s[0:1], v[4:5], v[38:39]
+; GFX950-NEXT: scratch_load_dword v39, off, s32 offset:120
+; GFX950-NEXT: scratch_load_dword v38, off, s32 offset:116
+; GFX950-NEXT: s_waitcnt vmcnt(25)
+; GFX950-NEXT: v_min_f64 v[62:63], v[6:7], v[48:49]
+; GFX950-NEXT: v_cmp_u_f64_e64 s[2:3], v[6:7], v[48:49]
+; GFX950-NEXT: scratch_load_dword v49, off, s32 offset:128
+; GFX950-NEXT: scratch_load_dword v48, off, s32 offset:124
+; GFX950-NEXT: s_waitcnt vmcnt(25)
+; GFX950-NEXT: v_min_f64 v[2:3], v[0:1], v[56:57]
+; GFX950-NEXT: v_cmp_u_f64_e64 s[4:5], v[0:1], v[56:57]
+; GFX950-NEXT: v_mov_b32_e32 v0, 0x7ff80000
+; GFX950-NEXT: s_waitcnt vmcnt(23)
+; GFX950-NEXT: v_min_f64 v[56:57], v[8:9], v[46:47]
+; GFX950-NEXT: v_cndmask_b32_e64 v1, v2, 0, s[4:5]
+; GFX950-NEXT: v_accvgpr_write_b32 a0, v1
+; GFX950-NEXT: v_cndmask_b32_e64 v1, v3, v0, s[4:5]
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v58, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v59, v0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[8:9], v[46:47]
+; GFX950-NEXT: s_waitcnt vmcnt(21)
+; GFX950-NEXT: v_min_f64 v[46:47], v[10:11], v[44:45]
+; GFX950-NEXT: v_cndmask_b32_e64 v4, v60, 0, s[0:1]
+; GFX950-NEXT: v_cndmask_b32_e64 v8, v56, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v9, v57, v0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[10:11], v[44:45]
+; GFX950-NEXT: s_waitcnt vmcnt(19)
+; GFX950-NEXT: v_min_f64 v[44:45], v[12:13], v[42:43]
+; GFX950-NEXT: v_cndmask_b32_e64 v5, v61, v0, s[0:1]
+; GFX950-NEXT: v_cndmask_b32_e64 v10, v46, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v11, v47, v0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[12:13], v[42:43]
+; GFX950-NEXT: s_waitcnt vmcnt(17)
+; GFX950-NEXT: v_min_f64 v[42:43], v[14:15], v[40:41]
+; GFX950-NEXT: v_cndmask_b32_e64 v6, v62, 0, s[2:3]
+; GFX950-NEXT: v_cndmask_b32_e64 v12, v44, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v13, v45, v0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[40:41]
+; GFX950-NEXT: s_waitcnt vmcnt(15)
+; GFX950-NEXT: v_min_f64 v[40:41], v[16:17], v[54:55]
+; GFX950-NEXT: v_cndmask_b32_e64 v7, v63, v0, s[2:3]
+; GFX950-NEXT: v_cndmask_b32_e64 v14, v42, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v15, v43, v0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[16:17], v[54:55]
+; GFX950-NEXT: s_waitcnt vmcnt(13)
+; GFX950-NEXT: v_min_f64 v[54:55], v[18:19], v[52:53]
+; GFX950-NEXT: v_accvgpr_read_b32 v63, a16 ; Reload Reuse
+; GFX950-NEXT: v_cndmask_b32_e64 v16, v40, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v17, v41, v0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[18:19], v[52:53]
+; GFX950-NEXT: s_waitcnt vmcnt(11)
+; GFX950-NEXT: v_min_f64 v[52:53], v[20:21], v[50:51]
+; GFX950-NEXT: v_accvgpr_read_b32 v62, a15 ; Reload Reuse
+; GFX950-NEXT: v_cndmask_b32_e64 v18, v54, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v19, v55, v0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[20:21], v[50:51]
+; GFX950-NEXT: s_waitcnt vmcnt(9)
+; GFX950-NEXT: v_min_f64 v[50:51], v[22:23], v[34:35]
+; GFX950-NEXT: v_accvgpr_read_b32 v61, a14 ; Reload Reuse
+; GFX950-NEXT: v_cndmask_b32_e64 v20, v52, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v21, v53, v0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[22:23], v[34:35]
+; GFX950-NEXT: s_waitcnt vmcnt(6)
+; GFX950-NEXT: v_min_f64 v[34:35], v[24:25], v[32:33]
+; GFX950-NEXT: v_accvgpr_read_b32 v60, a13 ; Reload Reuse
+; GFX950-NEXT: v_cndmask_b32_e64 v22, v50, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v23, v51, v0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[24:25], v[32:33]
+; GFX950-NEXT: v_accvgpr_read_b32 v59, a12 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v58, a11 ; Reload Reuse
+; GFX950-NEXT: v_cndmask_b32_e64 v24, v34, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v25, v35, v0, vcc
+; GFX950-NEXT: v_accvgpr_read_b32 v57, a10 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v56, a9 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v47, a8 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v46, a7 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v45, a6 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v44, a5 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v43, a4 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v42, a3 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v41, a2 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v40, a1 ; Reload Reuse
+; GFX950-NEXT: s_waitcnt vmcnt(4)
+; GFX950-NEXT: v_min_f64 v[32:33], v[26:27], v[36:37]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[26:27], v[36:37]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v26, v32, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v27, v33, v0, vcc
+; GFX950-NEXT: s_waitcnt vmcnt(2)
+; GFX950-NEXT: v_min_f64 v[32:33], v[28:29], v[38:39]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[28:29], v[38:39]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v28, v32, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v29, v33, v0, vcc
+; GFX950-NEXT: s_waitcnt vmcnt(0)
+; GFX950-NEXT: v_min_f64 v[32:33], v[30:31], v[48:49]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[30:31], v[48:49]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e64 v30, v32, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v31, v33, v0, vcc
+; GFX950-NEXT: v_accvgpr_read_b32 v0, a0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v16f64:
; GFX10: ; %bb.0:
diff --git a/llvm/test/MC/AMDGPU/flat-scratch-gfx940.s b/llvm/test/MC/AMDGPU/flat-scratch-gfx940.s
index fde3d2057b2ad1..d3ca4281dca414 100644
--- a/llvm/test/MC/AMDGPU/flat-scratch-gfx940.s
+++ b/llvm/test/MC/AMDGPU/flat-scratch-gfx940.s
@@ -1,4 +1,5 @@
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx940 -show-encoding %s | FileCheck -check-prefix=GFX940 %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx950 -show-encoding %s | FileCheck -check-prefix=GFX940 %s
scratch_load_dword a2, v4, s6
// GFX940: scratch_load_dword a2, v4, s6 ; encoding: [0x00,0x60,0x50,0xdc,0x04,0x00,0x86,0x02]
diff --git a/llvm/test/MC/AMDGPU/gfx940_asm_features.s b/llvm/test/MC/AMDGPU/gfx940_asm_features.s
index e208b6cf903d38..e2e84f27b828a4 100644
--- a/llvm/test/MC/AMDGPU/gfx940_asm_features.s
+++ b/llvm/test/MC/AMDGPU/gfx940_asm_features.s
@@ -1,4 +1,5 @@
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx940 -show-encoding %s | FileCheck --check-prefix=GFX940 --strict-whitespace %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx950 -show-encoding %s | FileCheck --check-prefix=GFX940 --strict-whitespace %s
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx90a %s 2>&1 | FileCheck --check-prefixes=NOT-GFX940,GFX90A --implicit-check-not=error: %s
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefixes=NOT-GFX940,GFX10 --implicit-check-not=error: %s
diff --git a/llvm/test/MC/AMDGPU/gfx950-unsupported.s b/llvm/test/MC/AMDGPU/gfx950-unsupported.s
new file mode 100644
index 00000000000000..f8bbd40b700fd8
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx950-unsupported.s
@@ -0,0 +1,179 @@
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx950 %s 2>&1 | FileCheck -check-prefix=ERR %s
+
+//===----------------------------------------------------------------------===//
+// v_mfma_f32_32x32x4_xf32
+//===----------------------------------------------------------------------===//
+
+v_mfma_f32_32x32x4_xf32 a[0:3], v[2:3], v[4:5], a[2:5]
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_32x32x4_xf32 v[0:3], v[0:3], v[0:3], v[0:3]
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_32x32x4_xf32 v[0:3], v[0:3], v[0:3], v[0:3]
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_32x32x4_xf32 a[0:3], a[0:3], a[0:3], a[0:3]
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_32x32x4_xf32 a[0:3], a[0:3], a[0:3], a[0:3]
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_32x32x4_xf32 v[0:3], a[0:3], v[0:3], 1.0
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_32x32x4_xf32 a[0:3], v[0:3], a[0:3], 1.0
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_32x32x4_xf32 v[0:3], v[0:3], v[0:3], v[0:3] blgp:5
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_32x32x4_xf32 a[0:3], a[0:3], a[0:3], a[0:3] blgp:1
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_32x32x4_xf32 a[0:3], a[0:3], a[0:3], a[0:3] cbsz:3
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_32x32x4_xf32 a[0:3], a[0:3], a[0:3], a[0:3] abid:1
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_32x32x4_xf32 a[0:3], a[0:3], a[0:3], a[0:3] cbsz:3 abid:1
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_32x32x4_xf32 a[0:3], v[0:3], v[0:3], a[4:7]
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_32x32x4_xf32 v[0:3], a[0:3], a[0:3], v[4:7]
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_32x32x4_xf32 a[0:3], v[2:3], v[4:5], a[2:5]
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_32x32x4_xf32 v[0:3], v[0:3], v[0:3], v[0:3]
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_32x32x4_xf32 v[0:3], v[0:3], v[0:3], v[0:3]
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_32x32x4_xf32 a[0:3], a[0:3], a[0:3], a[0:3]
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_32x32x4_xf32 a[0:3], a[0:3], a[0:3], a[0:3]
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_32x32x4_xf32 v[0:3], a[0:3], v[0:3], 1.0
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_32x32x4_xf32 a[0:3], v[0:3], a[0:3], 1.0
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_32x32x4_xf32 v[0:3], v[0:3], v[0:3], v[0:3] blgp:5
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_32x32x4_xf32 a[0:3], a[0:3], a[0:3], a[0:3] blgp:1
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_32x32x4_xf32 a[0:3], a[0:3], a[0:3], a[0:3] cbsz:3
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_32x32x4_xf32 a[0:3], a[0:3], a[0:3], a[0:3] abid:1
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_32x32x4_xf32 a[0:3], a[0:3], a[0:3], a[0:3] cbsz:3 abid:1
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_32x32x4_xf32 a[0:3], v[0:3], v[0:3], a[4:7]
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_32x32x4_xf32 v[0:3], a[0:3], a[0:3], v[4:7]
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+
+//===----------------------------------------------------------------------===//
+// v_mfma_f32_16x16x8_xf32
+//===----------------------------------------------------------------------===//
+
+v_mfma_f32_16x16x8_xf32 a[0:3], v[2:3], v[4:5], a[2:5]
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_16x16x8_xf32 v[0:3], v[0:3], v[0:3], v[0:3]
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_16x16x8_xf32 v[0:3], v[0:3], v[0:3], v[0:3]
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_16x16x8_xf32 a[0:3], a[0:3], a[0:3], a[0:3]
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_16x16x8_xf32 a[0:3], a[0:3], a[0:3], a[0:3]
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_16x16x8_xf32 v[0:3], a[0:3], v[0:3], 1.0
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_16x16x8_xf32 a[0:3], v[0:3], a[0:3], 1.0
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_16x16x8_xf32 v[0:3], v[0:3], v[0:3], v[0:3] blgp:5
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_16x16x8_xf32 a[0:3], a[0:3], a[0:3], a[0:3] blgp:1
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_16x16x8_xf32 a[0:3], a[0:3], a[0:3], a[0:3] cbsz:3
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_16x16x8_xf32 a[0:3], a[0:3], a[0:3], a[0:3] abid:1
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_16x16x8_xf32 a[0:3], a[0:3], a[0:3], a[0:3] cbsz:3 abid:1
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_16x16x8_xf32 a[0:3], v[0:3], v[0:3], a[4:7]
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_16x16x8_xf32 v[0:3], a[0:3], a[0:3], v[4:7]
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+
+v_mfma_f32_16x16x8_xf32 a[0:3], v[2:3], v[4:5], a[2:5]
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_16x16x8_xf32 v[0:3], v[0:3], v[0:3], v[0:3]
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_16x16x8_xf32 v[0:3], v[0:3], v[0:3], v[0:3]
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_16x16x8_xf32 a[0:3], a[0:3], a[0:3], a[0:3]
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_16x16x8_xf32 a[0:3], a[0:3], a[0:3], a[0:3]
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_16x16x8_xf32 v[0:3], a[0:3], v[0:3], 1.0
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_16x16x8_xf32 a[0:3], v[0:3], a[0:3], 1.0
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_16x16x8_xf32 v[0:3], v[0:3], v[0:3], v[0:3] blgp:5
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_16x16x8_xf32 a[0:3], a[0:3], a[0:3], a[0:3] blgp:1
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_16x16x8_xf32 a[0:3], a[0:3], a[0:3], a[0:3] cbsz:3
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_16x16x8_xf32 a[0:3], a[0:3], a[0:3], a[0:3] abid:1
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_16x16x8_xf32 a[0:3], a[0:3], a[0:3], a[0:3] cbsz:3 abid:1
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_16x16x8_xf32 a[0:3], v[0:3], v[0:3], a[4:7]
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_mfma_f32_16x16x8_xf32 v[0:3], a[0:3], a[0:3], v[4:7]
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx950_invalid_encoding.txt b/llvm/test/MC/AMDGPU/gfx950_invalid_encoding.txt
new file mode 100644
index 00000000000000..0697ee8661e76d
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx950_invalid_encoding.txt
@@ -0,0 +1,13 @@
+# RUN: llvm-mc -disassemble -arch=amdgcn -mcpu=gfx950 -show-encoding %s 2>&1 | FileCheck --implicit-check-not=warning: --check-prefix=GFX950 %s
+
+# GFX950: warning: invalid instruction encoding
+0x00,0x80,0xbe,0xd3,0x02,0x09,0x0a,0x04
+
+# GFX950: warning: invalid instruction encoding
+0x00,0x00,0xbe,0xd3,0x02,0x09,0x0a,0x04
+
+# GFX950: warning: invalid instruction encoding
+0x00,0x00,0xbf,0xd3,0x02,0x09,0x0a,0x04
+
+# GFX950: warning: invalid instruction encoding
+0x00,0x80,0xbf,0xd3,0x02,0x09,0x0a,0x04
\ No newline at end of file
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx940_features.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx940_features.txt
index 9575e50f16312f..63e425fdb4ec96 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx940_features.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx940_features.txt
@@ -1,4 +1,5 @@
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx940 -disassemble -show-encoding %s | FileCheck -strict-whitespace --check-prefix=GFX940 %s
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx950 -disassemble -show-encoding %s | FileCheck -strict-whitespace --check-prefix=GFX940 %s
# GFX940: global_load_dword v2, v[2:3], off sc0 ; encoding: [0x00,0x80,0x51,0xdc,0x02,0x00,0x7f,0x02]
0x00,0x80,0x51,0xdc,0x02,0x00,0x7f,0x02
diff --git a/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml b/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml
index 9c79ea588f6247..416419b3a333f8 100644
--- a/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml
+++ b/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml
@@ -162,6 +162,10 @@
# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX942 | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX942 %s
# RUN: obj2yaml %t.o.AMDGCN_GFX942 | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX942 %s
+# RUN: sed -e 's/<BITS>/64/' -e 's/<MACH>/AMDGCN_GFX950/' %s | yaml2obj -o %t.o.AMDGCN_GFX950
+# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX950 | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX950 %s
+# RUN: obj2yaml %t.o.AMDGCN_GFX950 | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX950 %s
+
# RUN: sed -e 's/<BITS>/64/' -e 's/<MACH>/AMDGCN_GFX1010/' %s | yaml2obj -o %t.o.AMDGCN_GFX1010
# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX1010 | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX1010 %s
# RUN: obj2yaml %t.o.AMDGCN_GFX1010 | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX1010 %s
@@ -411,6 +415,9 @@
# ELF-AMDGCN-GFX942: EF_AMDGPU_MACH_AMDGCN_GFX942 (0x4C)
# YAML-AMDGCN-GFX942: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX942 ]
+# ELF-AMDGCN-GFX950: EF_AMDGPU_MACH_AMDGCN_GFX950 (0x4F)
+# YAML-AMDGCN-GFX950: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX950 ]
+
# ELF-AMDGCN-GFX1010: EF_AMDGPU_MACH_AMDGCN_GFX1010 (0x33)
# YAML-AMDGCN-GFX1010: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX1010 ]
diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll
index 45071ecb751321..475f6f6d8322c7 100644
--- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll
+++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll
@@ -146,6 +146,11 @@ define amdgpu_kernel void @test_kernel() {
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=6 -mcpu=gfx9-generic -filetype=obj -O0 -o %t.o %s
; RUN: llvm-objdump -D --arch-name=amdgcn -mllvm --amdhsa-code-object-version=6 --mcpu=gfx9-generic %t.o > %t-specify.txt
; RUN: llvm-objdump -D -mllvm --amdhsa-code-object-version=6 %t.o > %t-detect.txt
+;
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -filetype=obj -O0 -o %t.o %s
+; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx950 %t.o > %t-specify.txt
+; RUN: llvm-objdump -D %t.o > %t-detect.txt
+;
; RUN: diff %t-specify.txt %t-detect.txt
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -filetype=obj -O0 -o %t.o %s
diff --git a/llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test b/llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test
index 34c22dca3aa183..7de64a6edfe2e6 100644
--- a/llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test
+++ b/llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test
@@ -223,6 +223,15 @@
# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX942
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX942 -DFLAG_VALUE=0x4C
+# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX950
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX950 -DFLAG_VALUE=0x4F
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX950
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX950 -DFLAG_VALUE=0x4F
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX950
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX950 -DFLAG_VALUE=0x4F
+
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1010
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1010 -DFLAG_VALUE=0x33
diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp
index 1012cd020d525e..6360a169cbeda9 100644
--- a/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -1617,6 +1617,7 @@ const EnumEntry<unsigned> ElfHeaderMipsFlags[] = {
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90A, "gfx90a"), \
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90C, "gfx90c"), \
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX940, "gfx940"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX950, "gfx950"), \
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX941, "gfx941"), \
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX942, "gfx942"), \
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1010, "gfx1010"), \
diff --git a/offload/DeviceRTL/CMakeLists.txt b/offload/DeviceRTL/CMakeLists.txt
index 96cb79b7d071c5..c76ad018ab4fe7 100644
--- a/offload/DeviceRTL/CMakeLists.txt
+++ b/offload/DeviceRTL/CMakeLists.txt
@@ -43,7 +43,7 @@ set(include_directory ${devicertl_base_directory}/include)
set(source_directory ${devicertl_base_directory}/src)
set(all_amdgpu_architectures "gfx700;gfx701;gfx801;gfx803;gfx900;gfx902;gfx906"
- "gfx908;gfx90a;gfx90c;gfx940;gfx941;gfx942;gfx1010"
+ "gfx908;gfx90a;gfx90c;gfx940;gfx941;gfx942;gfx950;gfx1010"
"gfx1030;gfx1031;gfx1032;gfx1033;gfx1034;gfx1035"
"gfx1036;gfx1100;gfx1101;gfx1102;gfx1103;gfx1150"
"gfx1151;gfx1152;gfx1153")
>From 1f480f3b1e767fae2f111ce5fc2d6b458ea53c3b Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Fri, 15 Nov 2024 07:44:06 -0800
Subject: [PATCH 2/3] Reorder targets
---
llvm/tools/llvm-readobj/ELFDumper.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp
index 6360a169cbeda9..bb8ec41d87454c 100644
--- a/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -1617,9 +1617,9 @@ const EnumEntry<unsigned> ElfHeaderMipsFlags[] = {
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90A, "gfx90a"), \
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90C, "gfx90c"), \
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX940, "gfx940"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX950, "gfx950"), \
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX941, "gfx941"), \
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX942, "gfx942"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX950, "gfx950"), \
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1010, "gfx1010"), \
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1011, "gfx1011"), \
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1012, "gfx1012"), \
>From fd4cc2895ab91ecb3db83fa8afa00b4d29fdddcb Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Fri, 15 Nov 2024 07:44:23 -0800
Subject: [PATCH 3/3] Fix missing diff run line
---
llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll
index 475f6f6d8322c7..8d5307372a3030 100644
--- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll
+++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll
@@ -137,7 +137,6 @@ define amdgpu_kernel void @test_kernel() {
; ----------------------------------GFX9---------------------------------------
;
-
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=6 -mcpu=gfx9-4-generic -filetype=obj -O0 -o %t.o %s
; RUN: llvm-objdump -D --arch-name=amdgcn -mllvm --amdhsa-code-object-version=6 --mcpu=gfx9-4-generic %t.o > %t-specify.txt
; RUN: llvm-objdump -D -mllvm --amdhsa-code-object-version=6 %t.o > %t-detect.txt
@@ -146,11 +145,11 @@ define amdgpu_kernel void @test_kernel() {
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=6 -mcpu=gfx9-generic -filetype=obj -O0 -o %t.o %s
; RUN: llvm-objdump -D --arch-name=amdgcn -mllvm --amdhsa-code-object-version=6 --mcpu=gfx9-generic %t.o > %t-specify.txt
; RUN: llvm-objdump -D -mllvm --amdhsa-code-object-version=6 %t.o > %t-detect.txt
-;
+; RUN: diff %t-specify.txt %t-detect.txt
+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -filetype=obj -O0 -o %t.o %s
; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx950 %t.o > %t-specify.txt
; RUN: llvm-objdump -D %t.o > %t-detect.txt
-;
; RUN: diff %t-specify.txt %t-detect.txt
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -filetype=obj -O0 -o %t.o %s
More information about the cfe-commits
mailing list