[llvm] bae8b84 - AMDGPU: Add more tests for fp min/max combines (#184336)

via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 12 05:11:35 PDT 2026


Author: Matt Arsenault
Date: 2026-03-12T13:11:31+01:00
New Revision: bae8b84ce9de4b8cd3bda2db125a123e77a9d9f8

URL: https://github.com/llvm/llvm-project/commit/bae8b84ce9de4b8cd3bda2db125a123e77a9d9f8
DIFF: https://github.com/llvm/llvm-project/commit/bae8b84ce9de4b8cd3bda2db125a123e77a9d9f8.diff

LOG: AMDGPU: Add more tests for fp min/max combines (#184336)

There's some overlap with existing tests which
use the nnan flag. The vector cases get missed here.

Added: 
    llvm/test/CodeGen/AMDGPU/select-nsz-known-values-to-fmin-fmax.ll

Modified: 
    

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AMDGPU/select-nsz-known-values-to-fmin-fmax.ll b/llvm/test/CodeGen/AMDGPU/select-nsz-known-values-to-fmin-fmax.ll
new file mode 100644
index 0000000000000..65af7749037e2
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/select-nsz-known-values-to-fmin-fmax.ll
@@ -0,0 +1,956 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck -check-prefix=GFX7 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12 %s
+
+define half @v_max_pat_f16_oge(half nofpclass(nan) %a, half nofpclass(nan) %b) #0 {
+; GFX7-LABEL: v_max_pat_f16_oge:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_max_pat_f16_oge:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_max_f16_e32 v0, v0, v1
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_max_pat_f16_oge:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_max_f16_e32 v0.l, v0.l, v1.l
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_max_pat_f16_oge:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    s_wait_expcnt 0x0
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    v_max_num_f16_e32 v0, v0, v1
+; GFX12-NEXT:    s_setpc_b64 s[30:31]
+  %cmp = fcmp oge half %a, %b
+  %select = select nsz i1 %cmp, half %a, half %b
+  ret half %select
+}
+
+define half @v_min_pat_f16_olt(half nofpclass(nan) %a, half nofpclass(nan) %b) #0 {
+; GFX7-LABEL: v_min_pat_f16_olt:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_min_pat_f16_olt:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_min_f16_e32 v0, v0, v1
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_min_pat_f16_olt:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_min_f16_e32 v0.l, v0.l, v1.l
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_min_pat_f16_olt:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    s_wait_expcnt 0x0
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    v_min_num_f16_e32 v0, v0, v1
+; GFX12-NEXT:    s_setpc_b64 s[30:31]
+  %cmp = fcmp olt half %a, %b
+  %select = select nsz i1 %cmp, half %a, half %b
+  ret half %select
+}
+
+define half @v_max_pat_f16_uge(half nofpclass(nan) %a, half nofpclass(nan) %b) #0 {
+; GFX7-LABEL: v_max_pat_f16_uge:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_max_pat_f16_uge:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_max_f16_e32 v0, v0, v1
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_max_pat_f16_uge:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_max_f16_e32 v0.l, v0.l, v1.l
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_max_pat_f16_uge:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    s_wait_expcnt 0x0
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    v_max_num_f16_e32 v0, v0, v1
+; GFX12-NEXT:    s_setpc_b64 s[30:31]
+  %cmp = fcmp uge half %a, %b
+  %select = select nsz i1 %cmp, half %a, half %b
+  ret half %select
+}
+
+define half @v_min_pat_f16_ult(half nofpclass(nan) %a, half nofpclass(nan) %b) #0 {
+; GFX7-LABEL: v_min_pat_f16_ult:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_min_pat_f16_ult:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_min_f16_e32 v0, v0, v1
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_min_pat_f16_ult:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_min_f16_e32 v0.l, v0.l, v1.l
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_min_pat_f16_ult:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    s_wait_expcnt 0x0
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    v_min_num_f16_e32 v0, v0, v1
+; GFX12-NEXT:    s_setpc_b64 s[30:31]
+  %cmp = fcmp ult half %a, %b
+  %select = select nsz i1 %cmp, half %a, half %b
+  ret half %select
+}
+
+define <2 x half> @v_max_pat_v2f16_oge(<2 x half> nofpclass(nan) %a, <2 x half> nofpclass(nan) %b) #0 {
+; GFX7-LABEL: v_max_pat_v2f16_oge:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
+; GFX7-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v4, v3
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v5, v2
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v6, v1
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v7, v0
+; GFX7-NEXT:    v_cmp_ge_f32_e32 vcc, v5, v4
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v3, v2, vcc
+; GFX7-NEXT:    v_cmp_ge_f32_e32 vcc, v7, v6
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
+; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
+; GFX7-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX7-NEXT:    v_or_b32_e32 v0, v0, v2
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_max_pat_v2f16_oge:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_pk_max_f16 v0, v0, v1
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_max_pat_v2f16_oge:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_pk_max_f16 v0, v0, v1
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_max_pat_v2f16_oge:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    s_wait_expcnt 0x0
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    v_pk_max_num_f16 v0, v0, v1
+; GFX12-NEXT:    s_setpc_b64 s[30:31]
+  %cmp = fcmp oge <2 x half> %a, %b
+  %select = select nsz <2 x i1> %cmp, <2 x half> %a, <2 x half> %b
+  ret <2 x half> %select
+}
+
+define <2 x half> @v_min_pat_v2f16_olt(<2 x half> nofpclass(nan) %a, <2 x half> nofpclass(nan) %b) #0 {
+; GFX7-LABEL: v_min_pat_v2f16_olt:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
+; GFX7-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v4, v3
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v5, v2
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v6, v1
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v7, v0
+; GFX7-NEXT:    v_cmp_lt_f32_e32 vcc, v5, v4
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v3, v2, vcc
+; GFX7-NEXT:    v_cmp_lt_f32_e32 vcc, v7, v6
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
+; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
+; GFX7-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX7-NEXT:    v_or_b32_e32 v0, v0, v2
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_min_pat_v2f16_olt:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_pk_min_f16 v0, v0, v1
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_min_pat_v2f16_olt:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_pk_min_f16 v0, v0, v1
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_min_pat_v2f16_olt:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    s_wait_expcnt 0x0
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    v_pk_min_num_f16 v0, v0, v1
+; GFX12-NEXT:    s_setpc_b64 s[30:31]
+  %cmp = fcmp olt <2 x half> %a, %b
+  %select = select nsz <2 x i1> %cmp, <2 x half> %a, <2 x half> %b
+  ret <2 x half> %select
+}
+
+define <2 x half> @v_max_pat_v2f16_uge(<2 x half> nofpclass(nan) %a, <2 x half> nofpclass(nan) %b) #0 {
+; GFX7-LABEL: v_max_pat_v2f16_uge:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
+; GFX7-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v4, v3
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v5, v2
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v6, v1
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v7, v0
+; GFX7-NEXT:    v_cmp_ge_f32_e32 vcc, v5, v4
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v3, v2, vcc
+; GFX7-NEXT:    v_cmp_ge_f32_e32 vcc, v7, v6
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
+; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
+; GFX7-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX7-NEXT:    v_or_b32_e32 v0, v0, v2
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_max_pat_v2f16_uge:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_pk_max_f16 v0, v0, v1
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_max_pat_v2f16_uge:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_pk_max_f16 v0, v0, v1
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_max_pat_v2f16_uge:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    s_wait_expcnt 0x0
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    v_pk_max_num_f16 v0, v0, v1
+; GFX12-NEXT:    s_setpc_b64 s[30:31]
+  %cmp = fcmp uge <2 x half> %a, %b
+  %select = select nsz <2 x i1> %cmp, <2 x half> %a, <2 x half> %b
+  ret <2 x half> %select
+}
+
+define <2 x half> @v_min_pat_v2f16_ult(<2 x half> nofpclass(nan) %a, <2 x half> nofpclass(nan) %b) #0 {
+; GFX7-LABEL: v_min_pat_v2f16_ult:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
+; GFX7-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v4, v3
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v5, v2
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v6, v1
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v7, v0
+; GFX7-NEXT:    v_cmp_lt_f32_e32 vcc, v5, v4
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, v3, v2, vcc
+; GFX7-NEXT:    v_cmp_lt_f32_e32 vcc, v7, v6
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
+; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
+; GFX7-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX7-NEXT:    v_or_b32_e32 v0, v0, v2
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_min_pat_v2f16_ult:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_pk_min_f16 v0, v0, v1
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_min_pat_v2f16_ult:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_pk_min_f16 v0, v0, v1
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_min_pat_v2f16_ult:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    s_wait_expcnt 0x0
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    v_pk_min_num_f16 v0, v0, v1
+; GFX12-NEXT:    s_setpc_b64 s[30:31]
+  %cmp = fcmp ult <2 x half> %a, %b
+  %select = select nsz <2 x i1> %cmp, <2 x half> %a, <2 x half> %b
+  ret <2 x half> %select
+}
+
+define float @v_max_pat_f32_oge(float nofpclass(nan) %a, float nofpclass(nan) %b) #0 {
+; GFX7-LABEL: v_max_pat_f32_oge:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_max_pat_f32_oge:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_max_pat_f32_oge:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_max_pat_f32_oge:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    s_wait_expcnt 0x0
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    v_max_num_f32_e32 v0, v0, v1
+; GFX12-NEXT:    s_setpc_b64 s[30:31]
+  %cmp = fcmp oge float %a, %b
+  %select = select nsz i1 %cmp, float %a, float %b
+  ret float %select
+}
+
+define float @v_min_pat_f32_olt(float nofpclass(nan) %a, float nofpclass(nan) %b) #0 {
+; GFX7-LABEL: v_min_pat_f32_olt:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_min_pat_f32_olt:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_min_pat_f32_olt:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_min_pat_f32_olt:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    s_wait_expcnt 0x0
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    v_min_num_f32_e32 v0, v0, v1
+; GFX12-NEXT:    s_setpc_b64 s[30:31]
+  %cmp = fcmp olt float %a, %b
+  %select = select nsz i1 %cmp, float %a, float %b
+  ret float %select
+}
+
+define float @v_max_pat_f32_uge(float nofpclass(nan) %a, float nofpclass(nan) %b) #0 {
+; GFX7-LABEL: v_max_pat_f32_uge:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_max_pat_f32_uge:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_max_pat_f32_uge:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_max_pat_f32_uge:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    s_wait_expcnt 0x0
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    v_max_num_f32_e32 v0, v0, v1
+; GFX12-NEXT:    s_setpc_b64 s[30:31]
+  %cmp = fcmp uge float %a, %b
+  %select = select nsz i1 %cmp, float %a, float %b
+  ret float %select
+}
+
+define float @v_min_pat_f32_ult(float nofpclass(nan) %a, float nofpclass(nan) %b) #0 {
+; GFX7-LABEL: v_min_pat_f32_ult:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_min_pat_f32_ult:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_min_pat_f32_ult:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_min_pat_f32_ult:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    s_wait_expcnt 0x0
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    v_min_num_f32_e32 v0, v0, v1
+; GFX12-NEXT:    s_setpc_b64 s[30:31]
+  %cmp = fcmp ult float %a, %b
+   %select = select nsz i1 %cmp, float %a, float %b
+  ret float %select
+}
+
+define <2 x float> @v_max_pat_v2f32_oge(<2 x float> nofpclass(nan) %a, <2 x float> nofpclass(nan) %b) #0 {
+; GFX7-LABEL: v_max_pat_v2f32_oge:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_max_legacy_f32_e32 v0, v0, v2
+; GFX7-NEXT:    v_max_legacy_f32_e32 v1, v1, v3
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX900-LABEL: v_max_pat_v2f32_oge:
+; GFX900:       ; %bb.0:
+; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX900-NEXT:    v_cmp_ge_f32_e32 vcc, v1, v3
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; GFX900-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_max_pat_v2f32_oge:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX950-NEXT:    v_cmp_ge_f32_e32 vcc, v1, v3
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; GFX950-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_max_pat_v2f32_oge:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_cmp_ge_f32_e32 vcc_lo, v0, v2
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_ge_f32_e32 vcc_lo, v1, v3
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc_lo
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_max_pat_v2f32_oge:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    s_wait_expcnt 0x0
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    v_cmp_ge_f32_e32 vcc_lo, v0, v2
+; GFX12-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc_lo
+; GFX12-NEXT:    v_cmp_ge_f32_e32 vcc_lo, v1, v3
+; GFX12-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc_lo
+; GFX12-NEXT:    s_setpc_b64 s[30:31]
+  %cmp = fcmp oge <2 x float> %a, %b
+  %select = select nsz <2 x i1> %cmp, <2 x float> %a, <2 x float> %b
+  ret <2 x float> %select
+}
+
+define <2 x float> @v_min_pat_v2f32_olt(<2 x float> nofpclass(nan) %a, <2 x float> nofpclass(nan) %b) #0 {
+; GFX7-LABEL: v_min_pat_v2f32_olt:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_min_legacy_f32_e32 v0, v0, v2
+; GFX7-NEXT:    v_min_legacy_f32_e32 v1, v1, v3
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX900-LABEL: v_min_pat_v2f32_olt:
+; GFX900:       ; %bb.0:
+; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX900-NEXT:    v_cmp_lt_f32_e32 vcc, v1, v3
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; GFX900-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_min_pat_v2f32_olt:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX950-NEXT:    v_cmp_lt_f32_e32 vcc, v1, v3
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; GFX950-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_min_pat_v2f32_olt:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_cmp_lt_f32_e32 vcc_lo, v0, v2
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_lt_f32_e32 vcc_lo, v1, v3
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc_lo
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_min_pat_v2f32_olt:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    s_wait_expcnt 0x0
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    v_cmp_lt_f32_e32 vcc_lo, v0, v2
+; GFX12-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc_lo
+; GFX12-NEXT:    v_cmp_lt_f32_e32 vcc_lo, v1, v3
+; GFX12-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc_lo
+; GFX12-NEXT:    s_setpc_b64 s[30:31]
+  %cmp = fcmp olt <2 x float> %a, %b
+  %select = select nsz <2 x i1> %cmp, <2 x float> %a, <2 x float> %b
+  ret <2 x float> %select
+}
+
+define <2 x float> @v_max_pat_v2f32_uge(<2 x float> nofpclass(nan) %a, <2 x float> nofpclass(nan) %b) #0 {
+; GFX7-LABEL: v_max_pat_v2f32_uge:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_max_legacy_f32_e32 v0, v0, v2
+; GFX7-NEXT:    v_max_legacy_f32_e32 v1, v1, v3
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX900-LABEL: v_max_pat_v2f32_uge:
+; GFX900:       ; %bb.0:
+; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX900-NEXT:    v_cmp_ge_f32_e32 vcc, v1, v3
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; GFX900-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_max_pat_v2f32_uge:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX950-NEXT:    v_cmp_ge_f32_e32 vcc, v1, v3
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; GFX950-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_max_pat_v2f32_uge:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_cmp_ge_f32_e32 vcc_lo, v0, v2
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_ge_f32_e32 vcc_lo, v1, v3
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc_lo
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_max_pat_v2f32_uge:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    s_wait_expcnt 0x0
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    v_cmp_ge_f32_e32 vcc_lo, v0, v2
+; GFX12-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc_lo
+; GFX12-NEXT:    v_cmp_ge_f32_e32 vcc_lo, v1, v3
+; GFX12-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc_lo
+; GFX12-NEXT:    s_setpc_b64 s[30:31]
+  %cmp = fcmp uge <2 x float> %a, %b
+  %select = select nsz <2 x i1> %cmp, <2 x float> %a, <2 x float> %b
+  ret <2 x float> %select
+}
+
+define <2 x float> @v_min_pat_v2f32_ult(<2 x float> nofpclass(nan) %a, <2 x float> nofpclass(nan) %b) #0 {
+; GFX7-LABEL: v_min_pat_v2f32_ult:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_min_legacy_f32_e32 v0, v0, v2
+; GFX7-NEXT:    v_min_legacy_f32_e32 v1, v1, v3
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX900-LABEL: v_min_pat_v2f32_ult:
+; GFX900:       ; %bb.0:
+; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX900-NEXT:    v_cmp_lt_f32_e32 vcc, v1, v3
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; GFX900-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_min_pat_v2f32_ult:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX950-NEXT:    v_cmp_lt_f32_e32 vcc, v1, v3
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; GFX950-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_min_pat_v2f32_ult:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_cmp_lt_f32_e32 vcc_lo, v0, v2
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_lt_f32_e32 vcc_lo, v1, v3
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc_lo
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_min_pat_v2f32_ult:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    s_wait_expcnt 0x0
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    v_cmp_lt_f32_e32 vcc_lo, v0, v2
+; GFX12-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc_lo
+; GFX12-NEXT:    v_cmp_lt_f32_e32 vcc_lo, v1, v3
+; GFX12-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc_lo
+; GFX12-NEXT:    s_setpc_b64 s[30:31]
+  %cmp = fcmp ult <2 x float> %a, %b
+  %select = select nsz <2 x i1> %cmp, <2 x float> %a, <2 x float> %b
+  ret <2 x float> %select
+}
+
+define double @v_max_pat_f64_oge(double nofpclass(nan) %a, double nofpclass(nan) %b) #0 {
+; GFX7-LABEL: v_max_pat_f64_oge:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_max_pat_f64_oge:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_max_pat_f64_oge:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_max_pat_f64_oge:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    s_wait_expcnt 0x0
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    v_max_num_f64_e32 v[0:1], v[0:1], v[2:3]
+; GFX12-NEXT:    s_setpc_b64 s[30:31]
+  %cmp = fcmp oge double %a, %b
+  %select = select nsz i1 %cmp, double %a, double %b
+  ret double %select
+}
+
+define double @v_min_pat_f64_olt(double nofpclass(nan) %a, double nofpclass(nan) %b) #0 {
+; GFX7-LABEL: v_min_pat_f64_olt:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_min_pat_f64_olt:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_min_pat_f64_olt:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_min_pat_f64_olt:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    s_wait_expcnt 0x0
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    v_min_num_f64_e32 v[0:1], v[0:1], v[2:3]
+; GFX12-NEXT:    s_setpc_b64 s[30:31]
+  %cmp = fcmp olt double %a, %b
+  %select = select nsz i1 %cmp, double %a, double %b
+  ret double %select
+}
+
+define double @v_max_pat_f64_uge(double nofpclass(nan) %a, double nofpclass(nan) %b) #0 {
+; GFX7-LABEL: v_max_pat_f64_uge:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_max_pat_f64_uge:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_max_pat_f64_uge:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_max_pat_f64_uge:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    s_wait_expcnt 0x0
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    v_max_num_f64_e32 v[0:1], v[0:1], v[2:3]
+; GFX12-NEXT:    s_setpc_b64 s[30:31]
+  %cmp = fcmp uge double %a, %b
+  %select = select nsz i1 %cmp, double %a, double %b
+  ret double %select
+}
+
+define double @v_min_pat_f64_ult(double nofpclass(nan) %a, double nofpclass(nan) %b) #0 {
+; GFX7-LABEL: v_min_pat_f64_ult:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_min_pat_f64_ult:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_min_pat_f64_ult:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_min_pat_f64_ult:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    s_wait_expcnt 0x0
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    v_min_num_f64_e32 v[0:1], v[0:1], v[2:3]
+; GFX12-NEXT:    s_setpc_b64 s[30:31]
+  %cmp = fcmp ult double %a, %b
+  %select = select nsz i1 %cmp, double %a, double %b
+  ret double %select
+}
+
+define <2 x double> @v_max_pat_v2f64_oge(<2 x double> nofpclass(nan) %a, <2 x double> nofpclass(nan) %b) #0 {
+; GFX7-LABEL: v_max_pat_v2f64_oge:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_cmp_ge_f64_e32 vcc, v[0:1], v[4:5]
+; GFX7-NEXT:    v_cmp_ge_f64_e64 s[4:5], v[2:3], v[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e64 v2, v6, v2, s[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e64 v3, v7, v3, s[4:5]
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX900-LABEL: v_max_pat_v2f64_oge:
+; GFX900:       ; %bb.0:
+; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    v_cmp_ge_f64_e32 vcc, v[0:1], v[4:5]
+; GFX900-NEXT:    v_cmp_ge_f64_e64 s[4:5], v[2:3], v[6:7]
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX900-NEXT:    v_cndmask_b32_e64 v2, v6, v2, s[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e64 v3, v7, v3, s[4:5]
+; GFX900-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_max_pat_v2f64_oge:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT:    v_cmp_ge_f64_e32 vcc, v[0:1], v[4:5]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX950-NEXT:    v_cmp_ge_f64_e32 vcc, v[2:3], v[6:7]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v2, v6, v2, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v3, v7, v3, vcc
+; GFX950-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_max_pat_v2f64_oge:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_cmp_ge_f64_e32 vcc_lo, v[0:1], v[4:5]
+; GFX11-NEXT:    v_cmp_ge_f64_e64 s0, v[2:3], v[6:7]
+; GFX11-NEXT:    v_dual_cndmask_b32 v0, v4, v0 :: v_dual_cndmask_b32 v1, v5, v1
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NEXT:    v_cndmask_b32_e64 v2, v6, v2, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v3, v7, v3, s0
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_max_pat_v2f64_oge:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    s_wait_expcnt 0x0
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    v_cmp_ge_f64_e32 vcc_lo, v[0:1], v[4:5]
+; GFX12-NEXT:    v_cmp_ge_f64_e64 s0, v[2:3], v[6:7]
+; GFX12-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-NEXT:    v_dual_cndmask_b32 v0, v4, v0 :: v_dual_cndmask_b32 v1, v5, v1
+; GFX12-NEXT:    s_wait_alu depctr_va_sdst(0)
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX12-NEXT:    v_cndmask_b32_e64 v2, v6, v2, s0
+; GFX12-NEXT:    v_cndmask_b32_e64 v3, v7, v3, s0
+; GFX12-NEXT:    s_setpc_b64 s[30:31]
+  %cmp = fcmp oge <2 x double> %a, %b
+  %select = select nsz <2 x i1> %cmp, <2 x double> %a, <2 x double> %b
+  ret <2 x double> %select
+}
+
+define <2 x double> @v_min_pat_v2f64_olt(<2 x double> nofpclass(nan) %a, <2 x double> nofpclass(nan) %b) #0 {
+; GFX7-LABEL: v_min_pat_v2f64_olt:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
+; GFX7-NEXT:    v_cmp_lt_f64_e64 s[4:5], v[2:3], v[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e64 v2, v6, v2, s[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e64 v3, v7, v3, s[4:5]
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX900-LABEL: v_min_pat_v2f64_olt:
+; GFX900:       ; %bb.0:
+; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
+; GFX900-NEXT:    v_cmp_lt_f64_e64 s[4:5], v[2:3], v[6:7]
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX900-NEXT:    v_cndmask_b32_e64 v2, v6, v2, s[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e64 v3, v7, v3, s[4:5]
+; GFX900-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_min_pat_v2f64_olt:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX950-NEXT:    v_cmp_lt_f64_e32 vcc, v[2:3], v[6:7]
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v2, v6, v2, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v3, v7, v3, vcc
+; GFX950-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_min_pat_v2f64_olt:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_cmp_lt_f64_e32 vcc_lo, v[0:1], v[4:5]
+; GFX11-NEXT:    v_cmp_lt_f64_e64 s0, v[2:3], v[6:7]
+; GFX11-NEXT:    v_dual_cndmask_b32 v0, v4, v0 :: v_dual_cndmask_b32 v1, v5, v1
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NEXT:    v_cndmask_b32_e64 v2, v6, v2, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v3, v7, v3, s0
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_min_pat_v2f64_olt:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    s_wait_expcnt 0x0
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    v_cmp_lt_f64_e32 vcc_lo, v[0:1], v[4:5]
+; GFX12-NEXT:    v_cmp_lt_f64_e64 s0, v[2:3], v[6:7]
+; GFX12-NEXT:    s_wait_alu depctr_va_vcc(0)
+; GFX12-NEXT:    v_dual_cndmask_b32 v0, v4, v0 :: v_dual_cndmask_b32 v1, v5, v1
+; GFX12-NEXT:    s_wait_alu depctr_va_sdst(0)
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX12-NEXT:    v_cndmask_b32_e64 v2, v6, v2, s0
+; GFX12-NEXT:    v_cndmask_b32_e64 v3, v7, v3, s0
+; GFX12-NEXT:    s_setpc_b64 s[30:31]
+  %cmp = fcmp olt <2 x double> %a, %b
+  %select = select nsz <2 x i1> %cmp, <2 x double> %a, <2 x double> %b
+  ret <2 x double> %select
+}
+
+attributes #0 = { nounwind }


        


More information about the llvm-commits mailing list