[llvm] 6cf84e0 - AMDGPU: Test more subtargets in minimumnum/maximumnum tests (#139144)
via llvm-commits
llvm-commits at lists.llvm.org
Thu May 8 23:15:13 PDT 2025
Author: Matt Arsenault
Date: 2025-05-09T08:15:09+02:00
New Revision: 6cf84e036e84bc134c05185477a2a3853e2d6140
URL: https://github.com/llvm/llvm-project/commit/6cf84e036e84bc134c05185477a2a3853e2d6140
DIFF: https://github.com/llvm/llvm-project/commit/6cf84e036e84bc134c05185477a2a3853e2d6140.diff
LOG: AMDGPU: Test more subtargets in minimumnum/maximumnum tests (#139144)
Added:
Modified:
llvm/test/CodeGen/AMDGPU/maximumnum.ll
llvm/test/CodeGen/AMDGPU/minimumnum.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AMDGPU/maximumnum.ll b/llvm/test/CodeGen/AMDGPU/maximumnum.ll
index 718a266f49f5d..df79534a0844e 100644
--- a/llvm/test/CodeGen/AMDGPU/maximumnum.ll
+++ b/llvm/test/CodeGen/AMDGPU/maximumnum.ll
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck -check-prefix=GFX7 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefix=GFX8 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefix=GFX10 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s
@@ -8,6 +10,16 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16 %s
define half @v_maximumnum_f16(half %x, half %y) {
+; GFX7-LABEL: v_maximumnum_f16:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_f16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -80,6 +92,16 @@ define half @v_maximumnum_f16(half %x, half %y) {
}
define half @v_maximumnum_f16_nnan(half %x, half %y) {
+; GFX7-LABEL: v_maximumnum_f16_nnan:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_f16_nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -134,6 +156,14 @@ define half @v_maximumnum_f16_nnan(half %x, half %y) {
}
define half @v_maximumnum_f16_1.0(half %x) {
+; GFX7-LABEL: v_maximumnum_f16_1.0:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_max_f32_e32 v0, 1.0, v0
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_f16_1.0:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -199,6 +229,17 @@ define half @v_maximumnum_f16_1.0(half %x) {
}
define bfloat @v_maximumnum_bf16(bfloat %x, bfloat %y) {
+; GFX7-LABEL: v_maximumnum_bf16:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
+; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_bf16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -231,36 +272,67 @@ define bfloat @v_maximumnum_bf16(bfloat %x, bfloat %y) {
; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_bf16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v0
-; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v2, v2
-; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v3, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc
-; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v0
-; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v1
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v2, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v1, v0, vcc
-; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2
-; GFX9-NEXT: v_max_f32_e32 v2, v2, v2
-; GFX9-NEXT: v_bfe_u32 v3, v2, 16, 1
-; GFX9-NEXT: s_movk_i32 s4, 0x7fff
-; GFX9-NEXT: v_add3_u32 v3, v3, v2, s4
-; GFX9-NEXT: v_or_b32_e32 v4, 0x400000, v2
-; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v2, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v4, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; GFX9-NEXT: v_cmp_eq_u16_e32 vcc, 0, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cmp_eq_u16_e32 vcc, 0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX9-NEXT: v_and_b32_e32 v1, 0xffff0000, v2
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_bf16:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_lshlrev_b32_e32 v2, 16, v0
+; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v2, v2
+; GFX900-NEXT: v_lshlrev_b32_e32 v3, 16, v1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v3, v3
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX900-NEXT: v_lshlrev_b32_e32 v2, 16, v0
+; GFX900-NEXT: v_lshlrev_b32_e32 v3, 16, v1
+; GFX900-NEXT: v_cmp_gt_f32_e32 vcc, v2, v3
+; GFX900-NEXT: v_cndmask_b32_e32 v2, v1, v0, vcc
+; GFX900-NEXT: v_lshlrev_b32_e32 v2, 16, v2
+; GFX900-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX900-NEXT: v_bfe_u32 v3, v2, 16, 1
+; GFX900-NEXT: s_movk_i32 s4, 0x7fff
+; GFX900-NEXT: v_add3_u32 v3, v3, v2, s4
+; GFX900-NEXT: v_or_b32_e32 v4, 0x400000, v2
+; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v2, v2
+; GFX900-NEXT: v_cndmask_b32_e32 v2, v3, v4, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v3, 16, v2
+; GFX900-NEXT: v_cmp_eq_u16_e32 vcc, 0, v0
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
+; GFX900-NEXT: v_cmp_eq_u16_e32 vcc, 0, v1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-NEXT: v_and_b32_e32 v1, 0xffff0000, v2
+; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_bf16:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_lshlrev_b32_e32 v2, 16, v0
+; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v2, v2
+; GFX950-NEXT: v_lshlrev_b32_e32 v3, 16, v1
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v3, v3
+; GFX950-NEXT: v_lshlrev_b32_e32 v2, 16, v0
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX950-NEXT: v_lshlrev_b32_e32 v3, 16, v1
+; GFX950-NEXT: v_cmp_gt_f32_e32 vcc, v2, v3
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v2, v1, v0, vcc
+; GFX950-NEXT: v_lshlrev_b32_e32 v2, 16, v2
+; GFX950-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX950-NEXT: v_cvt_pk_bf16_f32 v2, v2, s0
+; GFX950-NEXT: v_cmp_eq_u16_e32 vcc, 0, v0
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX950-NEXT: v_cmp_eq_u16_e32 vcc, 0, v1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX950-NEXT: v_lshlrev_b32_e32 v1, 16, v2
+; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_bf16:
; GFX10: ; %bb.0:
@@ -474,6 +546,17 @@ define bfloat @v_maximumnum_bf16(bfloat %x, bfloat %y) {
}
define bfloat @v_maximumnum_bf16_nnan(bfloat %x, bfloat %y) {
+; GFX7-LABEL: v_maximumnum_bf16_nnan:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
+; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_bf16_nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -490,21 +573,41 @@ define bfloat @v_maximumnum_bf16_nnan(bfloat %x, bfloat %y) {
; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_bf16_nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v1
-; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v0
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v3, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v1, v0, vcc
-; GFX9-NEXT: v_cmp_eq_u16_e32 vcc, 0, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX9-NEXT: v_cmp_eq_u16_e32 vcc, 0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v2
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_bf16_nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_lshlrev_b32_e32 v2, 16, v1
+; GFX900-NEXT: v_lshlrev_b32_e32 v3, 16, v0
+; GFX900-NEXT: v_cmp_gt_f32_e32 vcc, v3, v2
+; GFX900-NEXT: v_cndmask_b32_e32 v2, v1, v0, vcc
+; GFX900-NEXT: v_cmp_eq_u16_e32 vcc, 0, v0
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX900-NEXT: v_cmp_eq_u16_e32 vcc, 0, v1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-NEXT: v_lshlrev_b32_e32 v1, 16, v2
+; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_bf16_nnan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_lshlrev_b32_e32 v2, 16, v1
+; GFX950-NEXT: v_lshlrev_b32_e32 v3, 16, v0
+; GFX950-NEXT: v_cmp_gt_f32_e32 vcc, v3, v2
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v2, v1, v0, vcc
+; GFX950-NEXT: v_cmp_eq_u16_e32 vcc, 0, v0
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX950-NEXT: v_cmp_eq_u16_e32 vcc, 0, v1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX950-NEXT: v_lshlrev_b32_e32 v1, 16, v2
+; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_bf16_nnan:
; GFX10: ; %bb.0:
@@ -618,6 +721,14 @@ define bfloat @v_maximumnum_bf16_nnan(bfloat %x, bfloat %y) {
}
define float @v_maximumnum_f32(float %x, float %y) {
+; GFX7-LABEL: v_maximumnum_f32:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_f32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -666,6 +777,12 @@ define float @v_maximumnum_f32(float %x, float %y) {
}
define float @v_maximumnum_f32_nnan(float %x, float %y) {
+; GFX7-LABEL: v_maximumnum_f32_nnan:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_f32_nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -704,6 +821,14 @@ define float @v_maximumnum_f32_nnan(float %x, float %y) {
}
define double @v_maximumnum_f64(double %x, double %y) {
+; GFX7-LABEL: v_maximumnum_f64:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_f64:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -754,6 +879,12 @@ define double @v_maximumnum_f64(double %x, double %y) {
}
define double @v_maximumnum_f64_nnan(double %x, double %y) {
+; GFX7-LABEL: v_maximumnum_f64_nnan:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_f64_nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -792,6 +923,13 @@ define double @v_maximumnum_f64_nnan(double %x, double %y) {
}
define float @v_maximumnum_f32_1.0(float %x) {
+; GFX7-LABEL: v_maximumnum_f32_1.0:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-NEXT: v_max_f32_e32 v0, 1.0, v0
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_f32_1.0:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -837,6 +975,14 @@ define float @v_maximumnum_f32_1.0(float %x) {
}
define float @v_maximumnum_f32_rhs_not_snan(float %x, float %y) {
+; GFX7-LABEL: v_maximumnum_f32_rhs_not_snan:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_f32_rhs_not_snan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -886,6 +1032,14 @@ define float @v_maximumnum_f32_rhs_not_snan(float %x, float %y) {
}
define float @v_maximumnum_f32_lhs_not_snan(float %x, float %y) {
+; GFX7-LABEL: v_maximumnum_f32_lhs_not_snan:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_f32_lhs_not_snan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -935,6 +1089,14 @@ define float @v_maximumnum_f32_lhs_not_snan(float %x, float %y) {
}
define float @v_maximumnum_f32_both_operands_not_snan(float %x, float %y) {
+; GFX7-LABEL: v_maximumnum_f32_both_operands_not_snan:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_f32_both_operands_not_snan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -985,6 +1147,13 @@ define float @v_maximumnum_f32_both_operands_not_snan(float %x, float %y) {
}
define double @v_maximumnum_f64_1.0(double %x) {
+; GFX7-LABEL: v_maximumnum_f64_1.0:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], 1.0
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_f64_1.0:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1030,6 +1199,16 @@ define double @v_maximumnum_f64_1.0(double %x) {
}
define half @v_maximumnum_f16_s_v(half inreg %x, half %y) {
+; GFX7-LABEL: v_maximumnum_f16_s_v:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e32 v1, s16
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_max_f32_e32 v0, v1, v0
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_f16_s_v:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1038,13 +1217,21 @@ define half @v_maximumnum_f16_s_v(half inreg %x, half %y) {
; GFX8-NEXT: v_max_f16_e32 v0, v1, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_f16_s_v:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX9-NEXT: v_max_f16_e64 v1, s16, s16
-; GFX9-NEXT: v_max_f16_e32 v0, v1, v0
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_f16_s_v:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX900-NEXT: v_max_f16_e64 v1, s16, s16
+; GFX900-NEXT: v_max_f16_e32 v0, v1, v0
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_f16_s_v:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX950-NEXT: v_max_f16_e64 v1, s0, s0
+; GFX950-NEXT: v_max_f16_e32 v0, v1, v0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_f16_s_v:
; GFX10: ; %bb.0:
@@ -1102,6 +1289,16 @@ define half @v_maximumnum_f16_s_v(half inreg %x, half %y) {
}
define half @v_maximumnum_f16_v_s(half %x, half inreg %y) {
+; GFX7-LABEL: v_maximumnum_f16_v_s:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_cvt_f16_f32_e32 v1, s16
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_f16_v_s:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1110,13 +1307,21 @@ define half @v_maximumnum_f16_v_s(half %x, half inreg %y) {
; GFX8-NEXT: v_max_f16_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_f16_v_s:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f16_e64 v1, s16, s16
-; GFX9-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX9-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_f16_v_s:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f16_e64 v1, s16, s16
+; GFX900-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX900-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_f16_v_s:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f16_e64 v1, s0, s0
+; GFX950-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX950-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_f16_v_s:
; GFX10: ; %bb.0:
@@ -1174,6 +1379,16 @@ define half @v_maximumnum_f16_v_s(half %x, half inreg %y) {
}
define half @v_maximumnum_f16_s_s(half inreg %x, half inreg %y) {
+; GFX7-LABEL: v_maximumnum_f16_s_s:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, s16
+; GFX7-NEXT: v_cvt_f16_f32_e32 v1, s17
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_f16_s_s:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1182,13 +1397,21 @@ define half @v_maximumnum_f16_s_s(half inreg %x, half inreg %y) {
; GFX8-NEXT: v_max_f16_e32 v0, v1, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_f16_s_s:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f16_e64 v0, s17, s17
-; GFX9-NEXT: v_max_f16_e64 v1, s16, s16
-; GFX9-NEXT: v_max_f16_e32 v0, v1, v0
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_f16_s_s:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f16_e64 v0, s17, s17
+; GFX900-NEXT: v_max_f16_e64 v1, s16, s16
+; GFX900-NEXT: v_max_f16_e32 v0, v1, v0
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_f16_s_s:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f16_e64 v0, s1, s1
+; GFX950-NEXT: v_max_f16_e64 v1, s0, s0
+; GFX950-NEXT: v_max_f16_e32 v0, v1, v0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_f16_s_s:
; GFX10: ; %bb.0:
@@ -1246,6 +1469,14 @@ define half @v_maximumnum_f16_s_s(half inreg %x, half inreg %y) {
}
define float @v_maximumnum_f32_s_v(float inreg %x, float %y) {
+; GFX7-LABEL: v_maximumnum_f32_s_v:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-NEXT: v_mul_f32_e64 v1, 1.0, s16
+; GFX7-NEXT: v_max_f32_e32 v0, v1, v0
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_f32_s_v:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1254,13 +1485,21 @@ define float @v_maximumnum_f32_s_v(float inreg %x, float %y) {
; GFX8-NEXT: v_max_f32_e32 v0, v1, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_f32_s_v:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-NEXT: v_max_f32_e64 v1, s16, s16
-; GFX9-NEXT: v_max_f32_e32 v0, v1, v0
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_f32_s_v:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-NEXT: v_max_f32_e64 v1, s16, s16
+; GFX900-NEXT: v_max_f32_e32 v0, v1, v0
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_f32_s_v:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-NEXT: v_max_f32_e64 v1, s0, s0
+; GFX950-NEXT: v_max_f32_e32 v0, v1, v0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_f32_s_v:
; GFX10: ; %bb.0:
@@ -1296,6 +1535,14 @@ define float @v_maximumnum_f32_s_v(float inreg %x, float %y) {
}
define float @v_maximumnum_f32_v_s(float %x, float inreg %y) {
+; GFX7-LABEL: v_maximumnum_f32_v_s:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mul_f32_e64 v1, 1.0, s16
+; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_f32_v_s:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1304,13 +1551,21 @@ define float @v_maximumnum_f32_v_s(float %x, float inreg %y) {
; GFX8-NEXT: v_max_f32_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_f32_v_s:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e64 v1, s16, s16
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_f32_v_s:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f32_e64 v1, s16, s16
+; GFX900-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_f32_v_s:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f32_e64 v1, s0, s0
+; GFX950-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_f32_v_s:
; GFX10: ; %bb.0:
@@ -1346,6 +1601,14 @@ define float @v_maximumnum_f32_v_s(float %x, float inreg %y) {
}
define float @v_maximumnum_f32_s_s(float inreg %x, float inreg %y) {
+; GFX7-LABEL: v_maximumnum_f32_s_s:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mul_f32_e64 v0, 1.0, s17
+; GFX7-NEXT: v_mul_f32_e64 v1, 1.0, s16
+; GFX7-NEXT: v_max_f32_e32 v0, v1, v0
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_f32_s_s:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1354,13 +1617,21 @@ define float @v_maximumnum_f32_s_s(float inreg %x, float inreg %y) {
; GFX8-NEXT: v_max_f32_e32 v0, v1, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_f32_s_s:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e64 v0, s17, s17
-; GFX9-NEXT: v_max_f32_e64 v1, s16, s16
-; GFX9-NEXT: v_max_f32_e32 v0, v1, v0
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_f32_s_s:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f32_e64 v0, s17, s17
+; GFX900-NEXT: v_max_f32_e64 v1, s16, s16
+; GFX900-NEXT: v_max_f32_e32 v0, v1, v0
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_f32_s_s:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f32_e64 v0, s1, s1
+; GFX950-NEXT: v_max_f32_e64 v1, s0, s0
+; GFX950-NEXT: v_max_f32_e32 v0, v1, v0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_f32_s_s:
; GFX10: ; %bb.0:
@@ -1396,6 +1667,14 @@ define float @v_maximumnum_f32_s_s(float inreg %x, float inreg %y) {
}
define double @v_maximumnum_f64_s_v(double inreg %x, double %y) {
+; GFX7-LABEL: v_maximumnum_f64_s_v:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX7-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
+; GFX7-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1]
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_f64_s_v:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1404,13 +1683,21 @@ define double @v_maximumnum_f64_s_v(double inreg %x, double %y) {
; GFX8-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_f64_s_v:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX9-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
-; GFX9-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_f64_s_v:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX900-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
+; GFX900-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_f64_s_v:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX950-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1]
+; GFX950-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1]
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_f64_s_v:
; GFX10: ; %bb.0:
@@ -1446,6 +1733,14 @@ define double @v_maximumnum_f64_s_v(double inreg %x, double %y) {
}
define double @v_maximumnum_f64_v_s(double %x, double inreg %y) {
+; GFX7-LABEL: v_maximumnum_f64_v_s:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
+; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_f64_v_s:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1454,13 +1749,21 @@ define double @v_maximumnum_f64_v_s(double %x, double inreg %y) {
; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_f64_v_s:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
-; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_f64_v_s:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
+; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_f64_v_s:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1]
+; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_f64_v_s:
; GFX10: ; %bb.0:
@@ -1496,6 +1799,14 @@ define double @v_maximumnum_f64_v_s(double %x, double inreg %y) {
}
define double @v_maximumnum_f64_s_s(double inreg %x, double inreg %y) {
+; GFX7-LABEL: v_maximumnum_f64_s_s:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_max_f64 v[0:1], s[18:19], s[18:19]
+; GFX7-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
+; GFX7-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1]
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_f64_s_s:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1504,13 +1815,21 @@ define double @v_maximumnum_f64_s_s(double inreg %x, double inreg %y) {
; GFX8-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_f64_s_s:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f64 v[0:1], s[18:19], s[18:19]
-; GFX9-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
-; GFX9-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_f64_s_s:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f64 v[0:1], s[18:19], s[18:19]
+; GFX900-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
+; GFX900-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_f64_s_s:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f64 v[0:1], s[2:3], s[2:3]
+; GFX950-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1]
+; GFX950-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1]
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_f64_s_s:
; GFX10: ; %bb.0:
@@ -1546,6 +1865,14 @@ define double @v_maximumnum_f64_s_s(double inreg %x, double inreg %y) {
}
define float @v_maximumnum_f32_fabs_rhs(float %x, float %y) {
+; GFX7-LABEL: v_maximumnum_f32_fabs_rhs:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mul_f32_e64 v1, 1.0, |v1|
+; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_f32_fabs_rhs:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1597,6 +1924,14 @@ define float @v_maximumnum_f32_fabs_rhs(float %x, float %y) {
}
define float @v_maximumnum_f32_fneg_fabs_rhs(float %x, float %y) {
+; GFX7-LABEL: v_maximumnum_f32_fneg_fabs_rhs:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mul_f32_e64 v1, -1.0, |v1|
+; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_f32_fneg_fabs_rhs:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1649,6 +1984,14 @@ define float @v_maximumnum_f32_fneg_fabs_rhs(float %x, float %y) {
}
define float @v_maximumnum_f32_fabs(float %x, float %y) {
+; GFX7-LABEL: v_maximumnum_f32_fabs:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mul_f32_e64 v1, 1.0, |v1|
+; GFX7-NEXT: v_mul_f32_e64 v0, 1.0, |v0|
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_f32_fabs:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1701,6 +2044,14 @@ define float @v_maximumnum_f32_fabs(float %x, float %y) {
}
define float @v_maximumnum_f32_fneg(float %x, float %y) {
+; GFX7-LABEL: v_maximumnum_f32_fneg:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mul_f32_e32 v1, -1.0, v1
+; GFX7-NEXT: v_mul_f32_e32 v0, -1.0, v0
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_f32_fneg:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1753,6 +2104,16 @@ define float @v_maximumnum_f32_fneg(float %x, float %y) {
}
define half @v_maximumnum_f16_fabs_rhs(half %x, half %y) {
+; GFX7-LABEL: v_maximumnum_f16_fabs_rhs:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_cvt_f32_f16_e64 v1, |v1|
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_f16_fabs_rhs:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1826,6 +2187,16 @@ define half @v_maximumnum_f16_fabs_rhs(half %x, half %y) {
}
define half @v_maximumnum_f16_fneg_fabs_rhs(half %x, half %y) {
+; GFX7-LABEL: v_maximumnum_f16_fneg_fabs_rhs:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_cvt_f32_f16_e64 v1, -|v1|
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_f16_fneg_fabs_rhs:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1900,6 +2271,16 @@ define half @v_maximumnum_f16_fneg_fabs_rhs(half %x, half %y) {
}
define half @v_maximumnum_f16_fabs(half %x, half %y) {
+; GFX7-LABEL: v_maximumnum_f16_fabs:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-NEXT: v_cvt_f32_f16_e64 v0, |v0|
+; GFX7-NEXT: v_cvt_f32_f16_e64 v1, |v1|
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_f16_fabs:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1974,6 +2355,16 @@ define half @v_maximumnum_f16_fabs(half %x, half %y) {
}
define half @v_maximumnum_f16_fneg(half %x, half %y) {
+; GFX7-LABEL: v_maximumnum_f16_fneg:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e64 v0, -v0
+; GFX7-NEXT: v_cvt_f16_f32_e64 v1, -v1
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_f16_fneg:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2048,6 +2439,14 @@ define half @v_maximumnum_f16_fneg(half %x, half %y) {
}
define double @v_maximumnum_f64_fneg(double %x, double %y) {
+; GFX7-LABEL: v_maximumnum_f64_fneg:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3]
+; GFX7-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
+; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_f64_fneg:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2100,6 +2499,21 @@ define double @v_maximumnum_f64_fneg(double %x, double %y) {
}
define <2 x half> @v_maximumnum_v2f16(<2 x half> %x, <2 x half> %y) {
+; GFX7-LABEL: v_maximumnum_v2f16:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX7-NEXT: v_max_f32_e32 v1, v1, v3
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_v2f16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2112,13 +2526,22 @@ define <2 x half> @v_maximumnum_v2f16(<2 x half> %x, <2 x half> %y) {
; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_v2f16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX9-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX9-NEXT: v_pk_max_f16 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_v2f16:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX900-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX900-NEXT: v_pk_max_f16 v0, v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_v2f16:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX950-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_pk_max_f16 v0, v0, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_v2f16:
; GFX10: ; %bb.0:
@@ -2154,6 +2577,21 @@ define <2 x half> @v_maximumnum_v2f16(<2 x half> %x, <2 x half> %y) {
}
define <2 x half> @v_maximumnum_v2f16_nnan(<2 x half> %x, <2 x half> %y) {
+; GFX7-LABEL: v_maximumnum_v2f16_nnan:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX7-NEXT: v_max_f32_e32 v1, v1, v3
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_v2f16_nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2194,6 +2632,26 @@ define <2 x half> @v_maximumnum_v2f16_nnan(<2 x half> %x, <2 x half> %y) {
}
define <3 x half> @v_maximumnum_v3f16(<3 x half> %x, <3 x half> %y) {
+; GFX7-LABEL: v_maximumnum_v3f16:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
+; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
+; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v3
+; GFX7-NEXT: v_max_f32_e32 v1, v1, v4
+; GFX7-NEXT: v_max_f32_e32 v2, v2, v5
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_v3f16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2209,16 +2667,28 @@ define <3 x half> @v_maximumnum_v3f16(<3 x half> %x, <3 x half> %y) {
; GFX8-NEXT: v_or_b32_e32 v0, v0, v4
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_v3f16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_max_f16 v2, v2, v2
-; GFX9-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX9-NEXT: v_pk_max_f16 v0, v0, v2
-; GFX9-NEXT: v_pk_max_f16 v2, v3, v3
-; GFX9-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX9-NEXT: v_pk_max_f16 v1, v1, v2
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_v3f16:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX900-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX900-NEXT: v_pk_max_f16 v0, v0, v2
+; GFX900-NEXT: v_pk_max_f16 v2, v3, v3
+; GFX900-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX900-NEXT: v_pk_max_f16 v1, v1, v2
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_v3f16:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX950-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX950-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX950-NEXT: v_pk_max_f16 v0, v0, v2
+; GFX950-NEXT: v_pk_max_f16 v2, v3, v3
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_pk_max_f16 v1, v1, v2
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_v3f16:
; GFX10: ; %bb.0:
@@ -2263,6 +2733,26 @@ define <3 x half> @v_maximumnum_v3f16(<3 x half> %x, <3 x half> %y) {
}
define <3 x half> @v_maximumnum_v3f16_nnan(<3 x half> %x, <3 x half> %y) {
+; GFX7-LABEL: v_maximumnum_v3f16_nnan:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
+; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
+; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v3
+; GFX7-NEXT: v_max_f32_e32 v1, v1, v4
+; GFX7-NEXT: v_max_f32_e32 v2, v2, v5
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_v3f16_nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2308,6 +2798,31 @@ define <3 x half> @v_maximumnum_v3f16_nnan(<3 x half> %x, <3 x half> %y) {
}
define <4 x half> @v_maximumnum_v4f16(<4 x half> %x, <4 x half> %y) {
+; GFX7-LABEL: v_maximumnum_v4f16:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
+; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
+; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
+; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
+; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
+; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v4
+; GFX7-NEXT: v_max_f32_e32 v1, v1, v5
+; GFX7-NEXT: v_max_f32_e32 v2, v2, v6
+; GFX7-NEXT: v_max_f32_e32 v3, v3, v7
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_v4f16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2327,16 +2842,28 @@ define <4 x half> @v_maximumnum_v4f16(<4 x half> %x, <4 x half> %y) {
; GFX8-NEXT: v_or_b32_e32 v1, v1, v4
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_v4f16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_max_f16 v2, v2, v2
-; GFX9-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX9-NEXT: v_pk_max_f16 v0, v0, v2
-; GFX9-NEXT: v_pk_max_f16 v2, v3, v3
-; GFX9-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX9-NEXT: v_pk_max_f16 v1, v1, v2
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_v4f16:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX900-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX900-NEXT: v_pk_max_f16 v0, v0, v2
+; GFX900-NEXT: v_pk_max_f16 v2, v3, v3
+; GFX900-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX900-NEXT: v_pk_max_f16 v1, v1, v2
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_v4f16:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX950-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX950-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX950-NEXT: v_pk_max_f16 v0, v0, v2
+; GFX950-NEXT: v_pk_max_f16 v2, v3, v3
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_pk_max_f16 v1, v1, v2
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_v4f16:
; GFX10: ; %bb.0:
@@ -2381,6 +2908,31 @@ define <4 x half> @v_maximumnum_v4f16(<4 x half> %x, <4 x half> %y) {
}
define <4 x half> @v_maximumnum_v4f16_nnan(<4 x half> %x, <4 x half> %y) {
+; GFX7-LABEL: v_maximumnum_v4f16_nnan:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
+; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
+; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
+; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
+; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
+; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v4
+; GFX7-NEXT: v_max_f32_e32 v1, v1, v5
+; GFX7-NEXT: v_max_f32_e32 v2, v2, v6
+; GFX7-NEXT: v_max_f32_e32 v3, v3, v7
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_v4f16_nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2428,6 +2980,41 @@ define <4 x half> @v_maximumnum_v4f16_nnan(<4 x half> %x, <4 x half> %y) {
}
define <6 x half> @v_maximumnum_v6f16(<6 x half> %x, <6 x half> %y) {
+; GFX7-LABEL: v_maximumnum_v6f16:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
+; GFX7-NEXT: v_cvt_f16_f32_e32 v11, v11
+; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
+; GFX7-NEXT: v_cvt_f16_f32_e32 v10, v10
+; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-NEXT: v_cvt_f16_f32_e32 v9, v9
+; GFX7-NEXT: v_cvt_f16_f32_e32 v8, v8
+; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
+; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
+; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GFX7-NEXT: v_cvt_f32_f16_e32 v11, v11
+; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GFX7-NEXT: v_cvt_f32_f16_e32 v10, v10
+; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
+; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
+; GFX7-NEXT: v_cvt_f32_f16_e32 v8, v8
+; GFX7-NEXT: v_cvt_f32_f16_e32 v9, v9
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v6
+; GFX7-NEXT: v_max_f32_e32 v1, v1, v7
+; GFX7-NEXT: v_max_f32_e32 v2, v2, v8
+; GFX7-NEXT: v_max_f32_e32 v3, v3, v9
+; GFX7-NEXT: v_max_f32_e32 v4, v4, v10
+; GFX7-NEXT: v_max_f32_e32 v5, v5, v11
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_v6f16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2454,19 +3041,34 @@ define <6 x half> @v_maximumnum_v6f16(<6 x half> %x, <6 x half> %y) {
; GFX8-NEXT: v_or_b32_e32 v2, v2, v6
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_v6f16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_max_f16 v3, v3, v3
-; GFX9-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX9-NEXT: v_pk_max_f16 v0, v0, v3
-; GFX9-NEXT: v_pk_max_f16 v3, v4, v4
-; GFX9-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX9-NEXT: v_pk_max_f16 v1, v1, v3
-; GFX9-NEXT: v_pk_max_f16 v3, v5, v5
-; GFX9-NEXT: v_pk_max_f16 v2, v2, v2
-; GFX9-NEXT: v_pk_max_f16 v2, v2, v3
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_v6f16:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX900-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX900-NEXT: v_pk_max_f16 v0, v0, v3
+; GFX900-NEXT: v_pk_max_f16 v3, v4, v4
+; GFX900-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX900-NEXT: v_pk_max_f16 v1, v1, v3
+; GFX900-NEXT: v_pk_max_f16 v3, v5, v5
+; GFX900-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX900-NEXT: v_pk_max_f16 v2, v2, v3
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_v6f16:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX950-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX950-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX950-NEXT: v_pk_max_f16 v0, v0, v3
+; GFX950-NEXT: v_pk_max_f16 v3, v4, v4
+; GFX950-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX950-NEXT: v_pk_max_f16 v1, v1, v3
+; GFX950-NEXT: v_pk_max_f16 v3, v5, v5
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_pk_max_f16 v2, v2, v3
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_v6f16:
; GFX10: ; %bb.0:
@@ -2520,6 +3122,51 @@ define <6 x half> @v_maximumnum_v6f16(<6 x half> %x, <6 x half> %y) {
}
define <8 x half> @v_maximumnum_v8f16(<8 x half> %x, <8 x half> %y) {
+; GFX7-LABEL: v_maximumnum_v8f16:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
+; GFX7-NEXT: v_cvt_f16_f32_e32 v15, v15
+; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
+; GFX7-NEXT: v_cvt_f16_f32_e32 v14, v14
+; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
+; GFX7-NEXT: v_cvt_f16_f32_e32 v13, v13
+; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
+; GFX7-NEXT: v_cvt_f16_f32_e32 v12, v12
+; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-NEXT: v_cvt_f16_f32_e32 v11, v11
+; GFX7-NEXT: v_cvt_f16_f32_e32 v10, v10
+; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_cvt_f16_f32_e32 v8, v8
+; GFX7-NEXT: v_cvt_f16_f32_e32 v9, v9
+; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
+; GFX7-NEXT: v_cvt_f32_f16_e32 v15, v15
+; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
+; GFX7-NEXT: v_cvt_f32_f16_e32 v14, v14
+; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GFX7-NEXT: v_cvt_f32_f16_e32 v13, v13
+; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GFX7-NEXT: v_cvt_f32_f16_e32 v12, v12
+; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_cvt_f32_f16_e32 v8, v8
+; GFX7-NEXT: v_cvt_f32_f16_e32 v9, v9
+; GFX7-NEXT: v_cvt_f32_f16_e32 v10, v10
+; GFX7-NEXT: v_cvt_f32_f16_e32 v11, v11
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v8
+; GFX7-NEXT: v_max_f32_e32 v1, v1, v9
+; GFX7-NEXT: v_max_f32_e32 v2, v2, v10
+; GFX7-NEXT: v_max_f32_e32 v3, v3, v11
+; GFX7-NEXT: v_max_f32_e32 v4, v4, v12
+; GFX7-NEXT: v_max_f32_e32 v5, v5, v13
+; GFX7-NEXT: v_max_f32_e32 v6, v6, v14
+; GFX7-NEXT: v_max_f32_e32 v7, v7, v15
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_v8f16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2553,22 +3200,40 @@ define <8 x half> @v_maximumnum_v8f16(<8 x half> %x, <8 x half> %y) {
; GFX8-NEXT: v_or_b32_e32 v3, v3, v8
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_v8f16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_max_f16 v4, v4, v4
-; GFX9-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX9-NEXT: v_pk_max_f16 v0, v0, v4
-; GFX9-NEXT: v_pk_max_f16 v4, v5, v5
-; GFX9-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX9-NEXT: v_pk_max_f16 v1, v1, v4
-; GFX9-NEXT: v_pk_max_f16 v4, v6, v6
-; GFX9-NEXT: v_pk_max_f16 v2, v2, v2
-; GFX9-NEXT: v_pk_max_f16 v2, v2, v4
-; GFX9-NEXT: v_pk_max_f16 v4, v7, v7
-; GFX9-NEXT: v_pk_max_f16 v3, v3, v3
-; GFX9-NEXT: v_pk_max_f16 v3, v3, v4
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_v8f16:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_pk_max_f16 v4, v4, v4
+; GFX900-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX900-NEXT: v_pk_max_f16 v0, v0, v4
+; GFX900-NEXT: v_pk_max_f16 v4, v5, v5
+; GFX900-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX900-NEXT: v_pk_max_f16 v1, v1, v4
+; GFX900-NEXT: v_pk_max_f16 v4, v6, v6
+; GFX900-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX900-NEXT: v_pk_max_f16 v2, v2, v4
+; GFX900-NEXT: v_pk_max_f16 v4, v7, v7
+; GFX900-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX900-NEXT: v_pk_max_f16 v3, v3, v4
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_v8f16:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_pk_max_f16 v4, v4, v4
+; GFX950-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX950-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX950-NEXT: v_pk_max_f16 v0, v0, v4
+; GFX950-NEXT: v_pk_max_f16 v4, v5, v5
+; GFX950-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX950-NEXT: v_pk_max_f16 v1, v1, v4
+; GFX950-NEXT: v_pk_max_f16 v4, v6, v6
+; GFX950-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX950-NEXT: v_pk_max_f16 v2, v2, v4
+; GFX950-NEXT: v_pk_max_f16 v4, v7, v7
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_pk_max_f16 v3, v3, v4
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_v8f16:
; GFX10: ; %bb.0:
@@ -2631,6 +3296,17 @@ define <8 x half> @v_maximumnum_v8f16(<8 x half> %x, <8 x half> %y) {
}
define <2 x float> @v_maximumnum_v2f32(<2 x float> %x, <2 x float> %y) {
+; GFX7-LABEL: v_maximumnum_v2f32:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mul_f32_e32 v2, 1.0, v2
+; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX7-NEXT: v_mul_f32_e32 v2, 1.0, v3
+; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX7-NEXT: v_max_f32_e32 v1, v1, v2
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_v2f32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2690,6 +3366,13 @@ define <2 x float> @v_maximumnum_v2f32(<2 x float> %x, <2 x float> %y) {
}
define <2 x float> @v_maximumnum_v2f32_nnan(<2 x float> %x, <2 x float> %y) {
+; GFX7-LABEL: v_maximumnum_v2f32_nnan:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX7-NEXT: v_max_f32_e32 v1, v1, v3
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_v2f32_nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2731,6 +3414,20 @@ define <2 x float> @v_maximumnum_v2f32_nnan(<2 x float> %x, <2 x float> %y) {
}
define <3 x float> @v_maximumnum_v3f32(<3 x float> %x, <3 x float> %y) {
+; GFX7-LABEL: v_maximumnum_v3f32:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mul_f32_e32 v3, 1.0, v3
+; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v3
+; GFX7-NEXT: v_mul_f32_e32 v3, 1.0, v4
+; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX7-NEXT: v_max_f32_e32 v1, v1, v3
+; GFX7-NEXT: v_mul_f32_e32 v3, 1.0, v5
+; GFX7-NEXT: v_mul_f32_e32 v2, 1.0, v2
+; GFX7-NEXT: v_max_f32_e32 v2, v2, v3
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_v3f32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2803,6 +3500,14 @@ define <3 x float> @v_maximumnum_v3f32(<3 x float> %x, <3 x float> %y) {
}
define <3 x float> @v_maximumnum_v3f32_nnan(<3 x float> %x, <3 x float> %y) {
+; GFX7-LABEL: v_maximumnum_v3f32_nnan:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v3
+; GFX7-NEXT: v_max_f32_e32 v1, v1, v4
+; GFX7-NEXT: v_max_f32_e32 v2, v2, v5
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_v3f32_nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2849,6 +3554,23 @@ define <3 x float> @v_maximumnum_v3f32_nnan(<3 x float> %x, <3 x float> %y) {
}
define <4 x float> @v_maximumnum_v4f32(<4 x float> %x, <4 x float> %y) {
+; GFX7-LABEL: v_maximumnum_v4f32:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mul_f32_e32 v4, 1.0, v4
+; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v4
+; GFX7-NEXT: v_mul_f32_e32 v4, 1.0, v5
+; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX7-NEXT: v_max_f32_e32 v1, v1, v4
+; GFX7-NEXT: v_mul_f32_e32 v4, 1.0, v6
+; GFX7-NEXT: v_mul_f32_e32 v2, 1.0, v2
+; GFX7-NEXT: v_max_f32_e32 v2, v2, v4
+; GFX7-NEXT: v_mul_f32_e32 v4, 1.0, v7
+; GFX7-NEXT: v_mul_f32_e32 v3, 1.0, v3
+; GFX7-NEXT: v_max_f32_e32 v3, v3, v4
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_v4f32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2932,6 +3654,15 @@ define <4 x float> @v_maximumnum_v4f32(<4 x float> %x, <4 x float> %y) {
}
define <4 x float> @v_maximumnum_v4f32_nnan(<4 x float> %x, <4 x float> %y) {
+; GFX7-LABEL: v_maximumnum_v4f32_nnan:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v4
+; GFX7-NEXT: v_max_f32_e32 v1, v1, v5
+; GFX7-NEXT: v_max_f32_e32 v2, v2, v6
+; GFX7-NEXT: v_max_f32_e32 v3, v3, v7
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_v4f32_nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2981,6 +3712,17 @@ define <4 x float> @v_maximumnum_v4f32_nnan(<4 x float> %x, <4 x float> %y) {
}
define <2 x double> @v_maximumnum_v2f64(<2 x double> %x, <2 x double> %y) {
+; GFX7-LABEL: v_maximumnum_v2f64:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX7-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_v2f64:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2992,16 +3734,27 @@ define <2 x double> @v_maximumnum_v2f64(<2 x double> %x, <2 x double> %y) {
; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_v2f64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
-; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX9-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
-; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
-; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_v2f64:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX900-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX900-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX900-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_v2f64:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX950-NEXT: v_max_f64 v[4:5], v[6:7], v[6:7]
+; GFX950-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX950-NEXT: v_max_f64 v[2:3], v[2:3], v[4:5]
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_v2f64:
; GFX10: ; %bb.0:
@@ -3046,6 +3799,13 @@ define <2 x double> @v_maximumnum_v2f64(<2 x double> %x, <2 x double> %y) {
}
define <2 x double> @v_maximumnum_v2f64_nnan(<2 x double> %x, <2 x double> %y) {
+; GFX7-LABEL: v_maximumnum_v2f64_nnan:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_v2f64_nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3089,6 +3849,20 @@ define <2 x double> @v_maximumnum_v2f64_nnan(<2 x double> %x, <2 x double> %y) {
}
define <3 x double> @v_maximumnum_v3f64(<3 x double> %x, <3 x double> %y) {
+; GFX7-LABEL: v_maximumnum_v3f64:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX7-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
+; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX7-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
+; GFX7-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX7-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_v3f64:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3103,19 +3877,33 @@ define <3 x double> @v_maximumnum_v3f64(<3 x double> %x, <3 x double> %y) {
; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_v3f64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
-; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX9-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
-; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX9-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
-; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
-; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
-; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_v3f64:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX900-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
+; GFX900-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX900-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
+; GFX900-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX900-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX900-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_v3f64:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX950-NEXT: v_max_f64 v[6:7], v[8:9], v[8:9]
+; GFX950-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX950-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
+; GFX950-NEXT: v_max_f64 v[6:7], v[10:11], v[10:11]
+; GFX950-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX950-NEXT: v_max_f64 v[4:5], v[4:5], v[6:7]
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_v3f64:
; GFX10: ; %bb.0:
@@ -3169,6 +3957,14 @@ define <3 x double> @v_maximumnum_v3f64(<3 x double> %x, <3 x double> %y) {
}
define <3 x double> @v_maximumnum_v3f64_nnan(<3 x double> %x, <3 x double> %y) {
+; GFX7-LABEL: v_maximumnum_v3f64_nnan:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX7-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_v3f64_nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3217,6 +4013,23 @@ define <3 x double> @v_maximumnum_v3f64_nnan(<3 x double> %x, <3 x double> %y) {
}
define <4 x double> @v_maximumnum_v4f64(<4 x double> %x, <4 x double> %y) {
+; GFX7-LABEL: v_maximumnum_v4f64:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
+; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX7-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
+; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX7-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
+; GFX7-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX7-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15]
+; GFX7-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
+; GFX7-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
+; GFX7-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_v4f64:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3234,22 +4047,39 @@ define <4 x double> @v_maximumnum_v4f64(<4 x double> %x, <4 x double> %y) {
; GFX8-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_v4f64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
-; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX9-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX9-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
-; GFX9-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15]
-; GFX9-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
-; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
-; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
-; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
-; GFX9-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_v4f64:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
+; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX900-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
+; GFX900-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX900-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
+; GFX900-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX900-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15]
+; GFX900-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX900-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
+; GFX900-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
+; GFX900-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_v4f64:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
+; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX950-NEXT: v_max_f64 v[8:9], v[10:11], v[10:11]
+; GFX950-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX950-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX950-NEXT: v_max_f64 v[8:9], v[12:13], v[12:13]
+; GFX950-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX950-NEXT: v_max_f64 v[4:5], v[4:5], v[8:9]
+; GFX950-NEXT: v_max_f64 v[8:9], v[14:15], v[14:15]
+; GFX950-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX950-NEXT: v_max_f64 v[6:7], v[6:7], v[8:9]
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_v4f64:
; GFX10: ; %bb.0:
@@ -3312,6 +4142,15 @@ define <4 x double> @v_maximumnum_v4f64(<4 x double> %x, <4 x double> %y) {
}
define <4 x double> @v_maximumnum_v4f64_nnan(<4 x double> %x, <4 x double> %y) {
+; GFX7-LABEL: v_maximumnum_v4f64_nnan:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
+; GFX7-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
+; GFX7-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_v4f64_nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/minimumnum.ll b/llvm/test/CodeGen/AMDGPU/minimumnum.ll
index 2b4d687bb0c29..03e5777d5b04c 100644
--- a/llvm/test/CodeGen/AMDGPU/minimumnum.ll
+++ b/llvm/test/CodeGen/AMDGPU/minimumnum.ll
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck -check-prefix=GFX7 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefix=GFX8 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefix=GFX10 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s
@@ -8,6 +10,16 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16 %s
define half @v_minimumnum_f16(half %x, half %y) {
+; GFX7-LABEL: v_minimumnum_f16:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_minimumnum_f16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -80,6 +92,16 @@ define half @v_minimumnum_f16(half %x, half %y) {
}
define half @v_minimumnum_f16_nnan(half %x, half %y) {
+; GFX7-LABEL: v_minimumnum_f16_nnan:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_minimumnum_f16_nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -134,6 +156,14 @@ define half @v_minimumnum_f16_nnan(half %x, half %y) {
}
define half @v_minimumnum_f16_1.0(half %x) {
+; GFX7-LABEL: v_minimumnum_f16_1.0:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_min_f32_e32 v0, 1.0, v0
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_minimumnum_f16_1.0:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -199,6 +229,17 @@ define half @v_minimumnum_f16_1.0(half %x) {
}
define bfloat @v_minimumnum_bf16(bfloat %x, bfloat %y) {
+; GFX7-LABEL: v_minimumnum_bf16:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
+; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_minimumnum_bf16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -232,37 +273,69 @@ define bfloat @v_minimumnum_bf16(bfloat %x, bfloat %y) {
; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_bf16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v0
-; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v2, v2
-; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v3, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc
-; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v0
-; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v1
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v2, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v1, v0, vcc
-; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2
-; GFX9-NEXT: v_max_f32_e32 v2, v2, v2
-; GFX9-NEXT: v_bfe_u32 v3, v2, 16, 1
-; GFX9-NEXT: s_movk_i32 s4, 0x7fff
-; GFX9-NEXT: v_add3_u32 v3, v3, v2, s4
-; GFX9-NEXT: v_or_b32_e32 v4, 0x400000, v2
-; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v2, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v4, vcc
-; GFX9-NEXT: s_movk_i32 s4, 0x8000
-; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; GFX9-NEXT: v_cmp_eq_u16_e32 vcc, s4, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cmp_eq_u16_e32 vcc, s4, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX9-NEXT: v_and_b32_e32 v1, 0xffff0000, v2
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_bf16:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_lshlrev_b32_e32 v2, 16, v0
+; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v2, v2
+; GFX900-NEXT: v_lshlrev_b32_e32 v3, 16, v1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v3, v3
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX900-NEXT: v_lshlrev_b32_e32 v2, 16, v0
+; GFX900-NEXT: v_lshlrev_b32_e32 v3, 16, v1
+; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v2, v3
+; GFX900-NEXT: v_cndmask_b32_e32 v2, v1, v0, vcc
+; GFX900-NEXT: v_lshlrev_b32_e32 v2, 16, v2
+; GFX900-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX900-NEXT: v_bfe_u32 v3, v2, 16, 1
+; GFX900-NEXT: s_movk_i32 s4, 0x7fff
+; GFX900-NEXT: v_add3_u32 v3, v3, v2, s4
+; GFX900-NEXT: v_or_b32_e32 v4, 0x400000, v2
+; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v2, v2
+; GFX900-NEXT: v_cndmask_b32_e32 v2, v3, v4, vcc
+; GFX900-NEXT: s_movk_i32 s4, 0x8000
+; GFX900-NEXT: v_lshrrev_b32_e32 v3, 16, v2
+; GFX900-NEXT: v_cmp_eq_u16_e32 vcc, s4, v0
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
+; GFX900-NEXT: v_cmp_eq_u16_e32 vcc, s4, v1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-NEXT: v_and_b32_e32 v1, 0xffff0000, v2
+; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_bf16:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_lshlrev_b32_e32 v2, 16, v0
+; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v2, v2
+; GFX950-NEXT: v_lshlrev_b32_e32 v3, 16, v1
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v3, v3
+; GFX950-NEXT: v_lshlrev_b32_e32 v2, 16, v0
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX950-NEXT: v_lshlrev_b32_e32 v3, 16, v1
+; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v2, v3
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v2, v1, v0, vcc
+; GFX950-NEXT: v_lshlrev_b32_e32 v2, 16, v2
+; GFX950-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX950-NEXT: v_cvt_pk_bf16_f32 v2, v2, s0
+; GFX950-NEXT: s_movk_i32 s0, 0x8000
+; GFX950-NEXT: v_cmp_eq_u16_e32 vcc, s0, v0
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX950-NEXT: v_cmp_eq_u16_e32 vcc, s0, v1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX950-NEXT: v_lshlrev_b32_e32 v1, 16, v2
+; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_bf16:
; GFX10: ; %bb.0:
@@ -476,6 +549,17 @@ define bfloat @v_minimumnum_bf16(bfloat %x, bfloat %y) {
}
define bfloat @v_minimumnum_bf16_nnan(bfloat %x, bfloat %y) {
+; GFX7-LABEL: v_minimumnum_bf16_nnan:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
+; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_minimumnum_bf16_nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -493,22 +577,43 @@ define bfloat @v_minimumnum_bf16_nnan(bfloat %x, bfloat %y) {
; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_bf16_nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v1
-; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v0
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v3, v2
-; GFX9-NEXT: s_movk_i32 s4, 0x8000
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v1, v0, vcc
-; GFX9-NEXT: v_cmp_eq_u16_e32 vcc, s4, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX9-NEXT: v_cmp_eq_u16_e32 vcc, s4, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v2
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_bf16_nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_lshlrev_b32_e32 v2, 16, v1
+; GFX900-NEXT: v_lshlrev_b32_e32 v3, 16, v0
+; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v3, v2
+; GFX900-NEXT: s_movk_i32 s4, 0x8000
+; GFX900-NEXT: v_cndmask_b32_e32 v2, v1, v0, vcc
+; GFX900-NEXT: v_cmp_eq_u16_e32 vcc, s4, v0
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX900-NEXT: v_cmp_eq_u16_e32 vcc, s4, v1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-NEXT: v_lshlrev_b32_e32 v1, 16, v2
+; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_bf16_nnan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_lshlrev_b32_e32 v2, 16, v1
+; GFX950-NEXT: v_lshlrev_b32_e32 v3, 16, v0
+; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v3, v2
+; GFX950-NEXT: s_movk_i32 s0, 0x8000
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v2, v1, v0, vcc
+; GFX950-NEXT: v_cmp_eq_u16_e32 vcc, s0, v0
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX950-NEXT: v_cmp_eq_u16_e32 vcc, s0, v1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX950-NEXT: v_lshlrev_b32_e32 v1, 16, v2
+; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_bf16_nnan:
; GFX10: ; %bb.0:
@@ -622,6 +727,14 @@ define bfloat @v_minimumnum_bf16_nnan(bfloat %x, bfloat %y) {
}
define float @v_minimumnum_f32(float %x, float %y) {
+; GFX7-LABEL: v_minimumnum_f32:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_minimumnum_f32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -670,6 +783,12 @@ define float @v_minimumnum_f32(float %x, float %y) {
}
define float @v_minimumnum_f32_nnan(float %x, float %y) {
+; GFX7-LABEL: v_minimumnum_f32_nnan:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_minimumnum_f32_nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -708,6 +827,14 @@ define float @v_minimumnum_f32_nnan(float %x, float %y) {
}
define double @v_minimumnum_f64(double %x, double %y) {
+; GFX7-LABEL: v_minimumnum_f64:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_minimumnum_f64:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -758,6 +885,12 @@ define double @v_minimumnum_f64(double %x, double %y) {
}
define double @v_minimumnum_f64_nnan(double %x, double %y) {
+; GFX7-LABEL: v_minimumnum_f64_nnan:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_minimumnum_f64_nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -796,6 +929,13 @@ define double @v_minimumnum_f64_nnan(double %x, double %y) {
}
define float @v_minimumnum_f32_1.0(float %x) {
+; GFX7-LABEL: v_minimumnum_f32_1.0:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-NEXT: v_min_f32_e32 v0, 1.0, v0
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_minimumnum_f32_1.0:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -841,6 +981,14 @@ define float @v_minimumnum_f32_1.0(float %x) {
}
define float @v_minimumnum_f32_rhs_not_snan(float %x, float %y) {
+; GFX7-LABEL: v_minimumnum_f32_rhs_not_snan:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_minimumnum_f32_rhs_not_snan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -890,6 +1038,14 @@ define float @v_minimumnum_f32_rhs_not_snan(float %x, float %y) {
}
define float @v_minimumnum_f32_lhs_not_snan(float %x, float %y) {
+; GFX7-LABEL: v_minimumnum_f32_lhs_not_snan:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_minimumnum_f32_lhs_not_snan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -939,6 +1095,14 @@ define float @v_minimumnum_f32_lhs_not_snan(float %x, float %y) {
}
define float @v_minimumnum_f32_both_operands_not_snan(float %x, float %y) {
+; GFX7-LABEL: v_minimumnum_f32_both_operands_not_snan:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_minimumnum_f32_both_operands_not_snan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -989,6 +1153,13 @@ define float @v_minimumnum_f32_both_operands_not_snan(float %x, float %y) {
}
define double @v_minimumnum_f64_1.0(double %x) {
+; GFX7-LABEL: v_minimumnum_f64_1.0:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], 1.0
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_minimumnum_f64_1.0:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1034,6 +1205,16 @@ define double @v_minimumnum_f64_1.0(double %x) {
}
define half @v_minimumnum_f16_v_s(half %x, half inreg %y) {
+; GFX7-LABEL: v_minimumnum_f16_v_s:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_cvt_f16_f32_e32 v1, s16
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_minimumnum_f16_v_s:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1042,13 +1223,21 @@ define half @v_minimumnum_f16_v_s(half %x, half inreg %y) {
; GFX8-NEXT: v_min_f16_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_f16_v_s:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f16_e64 v1, s16, s16
-; GFX9-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX9-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_f16_v_s:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f16_e64 v1, s16, s16
+; GFX900-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX900-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_f16_v_s:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f16_e64 v1, s0, s0
+; GFX950-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX950-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_f16_v_s:
; GFX10: ; %bb.0:
@@ -1106,6 +1295,16 @@ define half @v_minimumnum_f16_v_s(half %x, half inreg %y) {
}
define half @v_minimumnum_f16_s_s(half inreg %x, half inreg %y) {
+; GFX7-LABEL: v_minimumnum_f16_s_s:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, s16
+; GFX7-NEXT: v_cvt_f16_f32_e32 v1, s17
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_minimumnum_f16_s_s:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1114,13 +1313,21 @@ define half @v_minimumnum_f16_s_s(half inreg %x, half inreg %y) {
; GFX8-NEXT: v_min_f16_e32 v0, v1, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_f16_s_s:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f16_e64 v0, s17, s17
-; GFX9-NEXT: v_max_f16_e64 v1, s16, s16
-; GFX9-NEXT: v_min_f16_e32 v0, v1, v0
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_f16_s_s:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f16_e64 v0, s17, s17
+; GFX900-NEXT: v_max_f16_e64 v1, s16, s16
+; GFX900-NEXT: v_min_f16_e32 v0, v1, v0
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_f16_s_s:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f16_e64 v0, s1, s1
+; GFX950-NEXT: v_max_f16_e64 v1, s0, s0
+; GFX950-NEXT: v_min_f16_e32 v0, v1, v0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_f16_s_s:
; GFX10: ; %bb.0:
@@ -1178,6 +1385,14 @@ define half @v_minimumnum_f16_s_s(half inreg %x, half inreg %y) {
}
define float @v_minimumnum_f32_s_v(float inreg %x, float %y) {
+; GFX7-LABEL: v_minimumnum_f32_s_v:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-NEXT: v_mul_f32_e64 v1, 1.0, s16
+; GFX7-NEXT: v_min_f32_e32 v0, v1, v0
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_minimumnum_f32_s_v:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1186,13 +1401,21 @@ define float @v_minimumnum_f32_s_v(float inreg %x, float %y) {
; GFX8-NEXT: v_min_f32_e32 v0, v1, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_f32_s_v:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-NEXT: v_max_f32_e64 v1, s16, s16
-; GFX9-NEXT: v_min_f32_e32 v0, v1, v0
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_f32_s_v:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-NEXT: v_max_f32_e64 v1, s16, s16
+; GFX900-NEXT: v_min_f32_e32 v0, v1, v0
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_f32_s_v:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-NEXT: v_max_f32_e64 v1, s0, s0
+; GFX950-NEXT: v_min_f32_e32 v0, v1, v0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_f32_s_v:
; GFX10: ; %bb.0:
@@ -1228,6 +1451,14 @@ define float @v_minimumnum_f32_s_v(float inreg %x, float %y) {
}
define float @v_minimumnum_f32_v_s(float %x, float inreg %y) {
+; GFX7-LABEL: v_minimumnum_f32_v_s:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mul_f32_e64 v1, 1.0, s16
+; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_minimumnum_f32_v_s:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1236,13 +1467,21 @@ define float @v_minimumnum_f32_v_s(float %x, float inreg %y) {
; GFX8-NEXT: v_min_f32_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_f32_v_s:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e64 v1, s16, s16
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_f32_v_s:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f32_e64 v1, s16, s16
+; GFX900-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_f32_v_s:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f32_e64 v1, s0, s0
+; GFX950-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_f32_v_s:
; GFX10: ; %bb.0:
@@ -1278,6 +1517,14 @@ define float @v_minimumnum_f32_v_s(float %x, float inreg %y) {
}
define float @v_minimumnum_f32_s_s(float inreg %x, float inreg %y) {
+; GFX7-LABEL: v_minimumnum_f32_s_s:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mul_f32_e64 v0, 1.0, s17
+; GFX7-NEXT: v_mul_f32_e64 v1, 1.0, s16
+; GFX7-NEXT: v_min_f32_e32 v0, v1, v0
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_minimumnum_f32_s_s:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1286,13 +1533,21 @@ define float @v_minimumnum_f32_s_s(float inreg %x, float inreg %y) {
; GFX8-NEXT: v_min_f32_e32 v0, v1, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_f32_s_s:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e64 v0, s17, s17
-; GFX9-NEXT: v_max_f32_e64 v1, s16, s16
-; GFX9-NEXT: v_min_f32_e32 v0, v1, v0
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_f32_s_s:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f32_e64 v0, s17, s17
+; GFX900-NEXT: v_max_f32_e64 v1, s16, s16
+; GFX900-NEXT: v_min_f32_e32 v0, v1, v0
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_f32_s_s:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f32_e64 v0, s1, s1
+; GFX950-NEXT: v_max_f32_e64 v1, s0, s0
+; GFX950-NEXT: v_min_f32_e32 v0, v1, v0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_f32_s_s:
; GFX10: ; %bb.0:
@@ -1328,6 +1583,14 @@ define float @v_minimumnum_f32_s_s(float inreg %x, float inreg %y) {
}
define double @v_minimumnum_f64_s_v(double inreg %x, double %y) {
+; GFX7-LABEL: v_minimumnum_f64_s_v:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX7-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
+; GFX7-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1]
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_minimumnum_f64_s_v:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1336,13 +1599,21 @@ define double @v_minimumnum_f64_s_v(double inreg %x, double %y) {
; GFX8-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_f64_s_v:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX9-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
-; GFX9-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_f64_s_v:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX900-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
+; GFX900-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_f64_s_v:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX950-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1]
+; GFX950-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1]
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_f64_s_v:
; GFX10: ; %bb.0:
@@ -1378,6 +1649,14 @@ define double @v_minimumnum_f64_s_v(double inreg %x, double %y) {
}
define double @v_minimumnum_f64_v_s(double %x, double inreg %y) {
+; GFX7-LABEL: v_minimumnum_f64_v_s:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
+; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_minimumnum_f64_v_s:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1386,13 +1665,21 @@ define double @v_minimumnum_f64_v_s(double %x, double inreg %y) {
; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_f64_v_s:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
-; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_f64_v_s:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
+; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX900-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_f64_v_s:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1]
+; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX950-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_f64_v_s:
; GFX10: ; %bb.0:
@@ -1428,6 +1715,14 @@ define double @v_minimumnum_f64_v_s(double %x, double inreg %y) {
}
define double @v_minimumnum_f64_s_s(double inreg %x, double inreg %y) {
+; GFX7-LABEL: v_minimumnum_f64_s_s:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_max_f64 v[0:1], s[18:19], s[18:19]
+; GFX7-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
+; GFX7-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1]
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_minimumnum_f64_s_s:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1436,13 +1731,21 @@ define double @v_minimumnum_f64_s_s(double inreg %x, double inreg %y) {
; GFX8-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_f64_s_s:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f64 v[0:1], s[18:19], s[18:19]
-; GFX9-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
-; GFX9-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_f64_s_s:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f64 v[0:1], s[18:19], s[18:19]
+; GFX900-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17]
+; GFX900-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_f64_s_s:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f64 v[0:1], s[2:3], s[2:3]
+; GFX950-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1]
+; GFX950-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1]
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_f64_s_s:
; GFX10: ; %bb.0:
@@ -1478,6 +1781,14 @@ define double @v_minimumnum_f64_s_s(double inreg %x, double inreg %y) {
}
define float @v_minimumnum_f32_fabs_rhs(float %x, float %y) {
+; GFX7-LABEL: v_minimumnum_f32_fabs_rhs:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mul_f32_e64 v1, 1.0, |v1|
+; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_minimumnum_f32_fabs_rhs:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1529,6 +1840,14 @@ define float @v_minimumnum_f32_fabs_rhs(float %x, float %y) {
}
define float @v_minimumnum_f32_fneg_fabs_rhs(float %x, float %y) {
+; GFX7-LABEL: v_minimumnum_f32_fneg_fabs_rhs:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mul_f32_e64 v1, -1.0, |v1|
+; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_minimumnum_f32_fneg_fabs_rhs:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1581,6 +1900,14 @@ define float @v_minimumnum_f32_fneg_fabs_rhs(float %x, float %y) {
}
define float @v_minimumnum_f32_fabs(float %x, float %y) {
+; GFX7-LABEL: v_minimumnum_f32_fabs:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mul_f32_e64 v1, 1.0, |v1|
+; GFX7-NEXT: v_mul_f32_e64 v0, 1.0, |v0|
+; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_minimumnum_f32_fabs:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1633,6 +1960,14 @@ define float @v_minimumnum_f32_fabs(float %x, float %y) {
}
define float @v_minimumnum_f32_fneg(float %x, float %y) {
+; GFX7-LABEL: v_minimumnum_f32_fneg:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mul_f32_e32 v1, -1.0, v1
+; GFX7-NEXT: v_mul_f32_e32 v0, -1.0, v0
+; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_minimumnum_f32_fneg:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1685,6 +2020,16 @@ define float @v_minimumnum_f32_fneg(float %x, float %y) {
}
define half @v_minimumnum_f16_fabs_rhs(half %x, half %y) {
+; GFX7-LABEL: v_minimumnum_f16_fabs_rhs:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_cvt_f32_f16_e64 v1, |v1|
+; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_minimumnum_f16_fabs_rhs:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1758,6 +2103,16 @@ define half @v_minimumnum_f16_fabs_rhs(half %x, half %y) {
}
define half @v_minimumnum_f16_fneg_fabs_rhs(half %x, half %y) {
+; GFX7-LABEL: v_minimumnum_f16_fneg_fabs_rhs:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_cvt_f32_f16_e64 v1, -|v1|
+; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_minimumnum_f16_fneg_fabs_rhs:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1832,6 +2187,16 @@ define half @v_minimumnum_f16_fneg_fabs_rhs(half %x, half %y) {
}
define half @v_minimumnum_f16_fabs(half %x, half %y) {
+; GFX7-LABEL: v_minimumnum_f16_fabs:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-NEXT: v_cvt_f32_f16_e64 v0, |v0|
+; GFX7-NEXT: v_cvt_f32_f16_e64 v1, |v1|
+; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_minimumnum_f16_fabs:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1906,6 +2271,16 @@ define half @v_minimumnum_f16_fabs(half %x, half %y) {
}
define half @v_minimumnum_f16_fneg(half %x, half %y) {
+; GFX7-LABEL: v_minimumnum_f16_fneg:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e64 v0, -v0
+; GFX7-NEXT: v_cvt_f16_f32_e64 v1, -v1
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_minimumnum_f16_fneg:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1980,6 +2355,14 @@ define half @v_minimumnum_f16_fneg(half %x, half %y) {
}
define double @v_minimumnum_f64_fneg(double %x, double %y) {
+; GFX7-LABEL: v_minimumnum_f64_fneg:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3]
+; GFX7-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
+; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_minimumnum_f64_fneg:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2032,6 +2415,21 @@ define double @v_minimumnum_f64_fneg(double %x, double %y) {
}
define <2 x half> @v_minimumnum_v2f16(<2 x half> %x, <2 x half> %y) {
+; GFX7-LABEL: v_minimumnum_v2f16:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX7-NEXT: v_min_f32_e32 v1, v1, v3
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_minimumnum_v2f16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2044,13 +2442,22 @@ define <2 x half> @v_minimumnum_v2f16(<2 x half> %x, <2 x half> %y) {
; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_v2f16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX9-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX9-NEXT: v_pk_min_f16 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_v2f16:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX900-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX900-NEXT: v_pk_min_f16 v0, v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_v2f16:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX950-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_pk_min_f16 v0, v0, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_v2f16:
; GFX10: ; %bb.0:
@@ -2086,6 +2493,21 @@ define <2 x half> @v_minimumnum_v2f16(<2 x half> %x, <2 x half> %y) {
}
define <2 x half> @v_minimumnum_v2f16_nnan(<2 x half> %x, <2 x half> %y) {
+; GFX7-LABEL: v_minimumnum_v2f16_nnan:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX7-NEXT: v_min_f32_e32 v1, v1, v3
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_minimumnum_v2f16_nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2126,6 +2548,26 @@ define <2 x half> @v_minimumnum_v2f16_nnan(<2 x half> %x, <2 x half> %y) {
}
define <3 x half> @v_minimumnum_v3f16(<3 x half> %x, <3 x half> %y) {
+; GFX7-LABEL: v_minimumnum_v3f16:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
+; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
+; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GFX7-NEXT: v_min_f32_e32 v0, v0, v3
+; GFX7-NEXT: v_min_f32_e32 v1, v1, v4
+; GFX7-NEXT: v_min_f32_e32 v2, v2, v5
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_minimumnum_v3f16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2141,16 +2583,28 @@ define <3 x half> @v_minimumnum_v3f16(<3 x half> %x, <3 x half> %y) {
; GFX8-NEXT: v_or_b32_e32 v0, v0, v4
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_v3f16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_max_f16 v2, v2, v2
-; GFX9-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX9-NEXT: v_pk_min_f16 v0, v0, v2
-; GFX9-NEXT: v_pk_max_f16 v2, v3, v3
-; GFX9-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX9-NEXT: v_pk_min_f16 v1, v1, v2
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_v3f16:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX900-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX900-NEXT: v_pk_min_f16 v0, v0, v2
+; GFX900-NEXT: v_pk_max_f16 v2, v3, v3
+; GFX900-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX900-NEXT: v_pk_min_f16 v1, v1, v2
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_v3f16:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX950-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX950-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX950-NEXT: v_pk_min_f16 v0, v0, v2
+; GFX950-NEXT: v_pk_max_f16 v2, v3, v3
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_pk_min_f16 v1, v1, v2
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_v3f16:
; GFX10: ; %bb.0:
@@ -2195,6 +2649,26 @@ define <3 x half> @v_minimumnum_v3f16(<3 x half> %x, <3 x half> %y) {
}
define <3 x half> @v_minimumnum_v3f16_nnan(<3 x half> %x, <3 x half> %y) {
+; GFX7-LABEL: v_minimumnum_v3f16_nnan:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
+; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
+; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-NEXT: v_min_f32_e32 v0, v0, v3
+; GFX7-NEXT: v_min_f32_e32 v1, v1, v4
+; GFX7-NEXT: v_min_f32_e32 v2, v2, v5
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_minimumnum_v3f16_nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2240,6 +2714,31 @@ define <3 x half> @v_minimumnum_v3f16_nnan(<3 x half> %x, <3 x half> %y) {
}
define <4 x half> @v_minimumnum_v4f16(<4 x half> %x, <4 x half> %y) {
+; GFX7-LABEL: v_minimumnum_v4f16:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
+; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
+; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
+; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
+; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
+; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
+; GFX7-NEXT: v_min_f32_e32 v0, v0, v4
+; GFX7-NEXT: v_min_f32_e32 v1, v1, v5
+; GFX7-NEXT: v_min_f32_e32 v2, v2, v6
+; GFX7-NEXT: v_min_f32_e32 v3, v3, v7
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_minimumnum_v4f16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2259,16 +2758,28 @@ define <4 x half> @v_minimumnum_v4f16(<4 x half> %x, <4 x half> %y) {
; GFX8-NEXT: v_or_b32_e32 v1, v1, v4
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_v4f16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_max_f16 v2, v2, v2
-; GFX9-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX9-NEXT: v_pk_min_f16 v0, v0, v2
-; GFX9-NEXT: v_pk_max_f16 v2, v3, v3
-; GFX9-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX9-NEXT: v_pk_min_f16 v1, v1, v2
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_v4f16:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX900-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX900-NEXT: v_pk_min_f16 v0, v0, v2
+; GFX900-NEXT: v_pk_max_f16 v2, v3, v3
+; GFX900-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX900-NEXT: v_pk_min_f16 v1, v1, v2
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_v4f16:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX950-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX950-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX950-NEXT: v_pk_min_f16 v0, v0, v2
+; GFX950-NEXT: v_pk_max_f16 v2, v3, v3
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_pk_min_f16 v1, v1, v2
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_v4f16:
; GFX10: ; %bb.0:
@@ -2313,6 +2824,31 @@ define <4 x half> @v_minimumnum_v4f16(<4 x half> %x, <4 x half> %y) {
}
define <4 x half> @v_minimumnum_v4f16_nnan(<4 x half> %x, <4 x half> %y) {
+; GFX7-LABEL: v_minimumnum_v4f16_nnan:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
+; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
+; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
+; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
+; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
+; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-NEXT: v_min_f32_e32 v0, v0, v4
+; GFX7-NEXT: v_min_f32_e32 v1, v1, v5
+; GFX7-NEXT: v_min_f32_e32 v2, v2, v6
+; GFX7-NEXT: v_min_f32_e32 v3, v3, v7
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_minimumnum_v4f16_nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2360,6 +2896,41 @@ define <4 x half> @v_minimumnum_v4f16_nnan(<4 x half> %x, <4 x half> %y) {
}
define <6 x half> @v_minimumnum_v6f16(<6 x half> %x, <6 x half> %y) {
+; GFX7-LABEL: v_minimumnum_v6f16:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
+; GFX7-NEXT: v_cvt_f16_f32_e32 v11, v11
+; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
+; GFX7-NEXT: v_cvt_f16_f32_e32 v10, v10
+; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-NEXT: v_cvt_f16_f32_e32 v9, v9
+; GFX7-NEXT: v_cvt_f16_f32_e32 v8, v8
+; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
+; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
+; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GFX7-NEXT: v_cvt_f32_f16_e32 v11, v11
+; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GFX7-NEXT: v_cvt_f32_f16_e32 v10, v10
+; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
+; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
+; GFX7-NEXT: v_cvt_f32_f16_e32 v8, v8
+; GFX7-NEXT: v_cvt_f32_f16_e32 v9, v9
+; GFX7-NEXT: v_min_f32_e32 v0, v0, v6
+; GFX7-NEXT: v_min_f32_e32 v1, v1, v7
+; GFX7-NEXT: v_min_f32_e32 v2, v2, v8
+; GFX7-NEXT: v_min_f32_e32 v3, v3, v9
+; GFX7-NEXT: v_min_f32_e32 v4, v4, v10
+; GFX7-NEXT: v_min_f32_e32 v5, v5, v11
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_minimumnum_v6f16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2386,19 +2957,34 @@ define <6 x half> @v_minimumnum_v6f16(<6 x half> %x, <6 x half> %y) {
; GFX8-NEXT: v_or_b32_e32 v2, v2, v6
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_v6f16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_max_f16 v3, v3, v3
-; GFX9-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX9-NEXT: v_pk_min_f16 v0, v0, v3
-; GFX9-NEXT: v_pk_max_f16 v3, v4, v4
-; GFX9-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX9-NEXT: v_pk_min_f16 v1, v1, v3
-; GFX9-NEXT: v_pk_max_f16 v3, v5, v5
-; GFX9-NEXT: v_pk_max_f16 v2, v2, v2
-; GFX9-NEXT: v_pk_min_f16 v2, v2, v3
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_v6f16:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX900-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX900-NEXT: v_pk_min_f16 v0, v0, v3
+; GFX900-NEXT: v_pk_max_f16 v3, v4, v4
+; GFX900-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX900-NEXT: v_pk_min_f16 v1, v1, v3
+; GFX900-NEXT: v_pk_max_f16 v3, v5, v5
+; GFX900-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX900-NEXT: v_pk_min_f16 v2, v2, v3
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_v6f16:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX950-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX950-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX950-NEXT: v_pk_min_f16 v0, v0, v3
+; GFX950-NEXT: v_pk_max_f16 v3, v4, v4
+; GFX950-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX950-NEXT: v_pk_min_f16 v1, v1, v3
+; GFX950-NEXT: v_pk_max_f16 v3, v5, v5
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_pk_min_f16 v2, v2, v3
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_v6f16:
; GFX10: ; %bb.0:
@@ -2452,6 +3038,51 @@ define <6 x half> @v_minimumnum_v6f16(<6 x half> %x, <6 x half> %y) {
}
define <8 x half> @v_minimumnum_v8f16(<8 x half> %x, <8 x half> %y) {
+; GFX7-LABEL: v_minimumnum_v8f16:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
+; GFX7-NEXT: v_cvt_f16_f32_e32 v15, v15
+; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
+; GFX7-NEXT: v_cvt_f16_f32_e32 v14, v14
+; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
+; GFX7-NEXT: v_cvt_f16_f32_e32 v13, v13
+; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
+; GFX7-NEXT: v_cvt_f16_f32_e32 v12, v12
+; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-NEXT: v_cvt_f16_f32_e32 v11, v11
+; GFX7-NEXT: v_cvt_f16_f32_e32 v10, v10
+; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_cvt_f16_f32_e32 v8, v8
+; GFX7-NEXT: v_cvt_f16_f32_e32 v9, v9
+; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
+; GFX7-NEXT: v_cvt_f32_f16_e32 v15, v15
+; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
+; GFX7-NEXT: v_cvt_f32_f16_e32 v14, v14
+; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GFX7-NEXT: v_cvt_f32_f16_e32 v13, v13
+; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GFX7-NEXT: v_cvt_f32_f16_e32 v12, v12
+; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_cvt_f32_f16_e32 v8, v8
+; GFX7-NEXT: v_cvt_f32_f16_e32 v9, v9
+; GFX7-NEXT: v_cvt_f32_f16_e32 v10, v10
+; GFX7-NEXT: v_cvt_f32_f16_e32 v11, v11
+; GFX7-NEXT: v_min_f32_e32 v0, v0, v8
+; GFX7-NEXT: v_min_f32_e32 v1, v1, v9
+; GFX7-NEXT: v_min_f32_e32 v2, v2, v10
+; GFX7-NEXT: v_min_f32_e32 v3, v3, v11
+; GFX7-NEXT: v_min_f32_e32 v4, v4, v12
+; GFX7-NEXT: v_min_f32_e32 v5, v5, v13
+; GFX7-NEXT: v_min_f32_e32 v6, v6, v14
+; GFX7-NEXT: v_min_f32_e32 v7, v7, v15
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_minimumnum_v8f16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2485,22 +3116,40 @@ define <8 x half> @v_minimumnum_v8f16(<8 x half> %x, <8 x half> %y) {
; GFX8-NEXT: v_or_b32_e32 v3, v3, v8
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_v8f16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_max_f16 v4, v4, v4
-; GFX9-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX9-NEXT: v_pk_min_f16 v0, v0, v4
-; GFX9-NEXT: v_pk_max_f16 v4, v5, v5
-; GFX9-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX9-NEXT: v_pk_min_f16 v1, v1, v4
-; GFX9-NEXT: v_pk_max_f16 v4, v6, v6
-; GFX9-NEXT: v_pk_max_f16 v2, v2, v2
-; GFX9-NEXT: v_pk_min_f16 v2, v2, v4
-; GFX9-NEXT: v_pk_max_f16 v4, v7, v7
-; GFX9-NEXT: v_pk_max_f16 v3, v3, v3
-; GFX9-NEXT: v_pk_min_f16 v3, v3, v4
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_v8f16:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_pk_max_f16 v4, v4, v4
+; GFX900-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX900-NEXT: v_pk_min_f16 v0, v0, v4
+; GFX900-NEXT: v_pk_max_f16 v4, v5, v5
+; GFX900-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX900-NEXT: v_pk_min_f16 v1, v1, v4
+; GFX900-NEXT: v_pk_max_f16 v4, v6, v6
+; GFX900-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX900-NEXT: v_pk_min_f16 v2, v2, v4
+; GFX900-NEXT: v_pk_max_f16 v4, v7, v7
+; GFX900-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX900-NEXT: v_pk_min_f16 v3, v3, v4
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_v8f16:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_pk_max_f16 v4, v4, v4
+; GFX950-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX950-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX950-NEXT: v_pk_min_f16 v0, v0, v4
+; GFX950-NEXT: v_pk_max_f16 v4, v5, v5
+; GFX950-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX950-NEXT: v_pk_min_f16 v1, v1, v4
+; GFX950-NEXT: v_pk_max_f16 v4, v6, v6
+; GFX950-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX950-NEXT: v_pk_min_f16 v2, v2, v4
+; GFX950-NEXT: v_pk_max_f16 v4, v7, v7
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_pk_min_f16 v3, v3, v4
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_v8f16:
; GFX10: ; %bb.0:
@@ -2563,6 +3212,17 @@ define <8 x half> @v_minimumnum_v8f16(<8 x half> %x, <8 x half> %y) {
}
define <2 x float> @v_minimumnum_v2f32(<2 x float> %x, <2 x float> %y) {
+; GFX7-LABEL: v_minimumnum_v2f32:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mul_f32_e32 v2, 1.0, v2
+; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX7-NEXT: v_mul_f32_e32 v2, 1.0, v3
+; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX7-NEXT: v_min_f32_e32 v1, v1, v2
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_minimumnum_v2f32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2622,6 +3282,13 @@ define <2 x float> @v_minimumnum_v2f32(<2 x float> %x, <2 x float> %y) {
}
define <2 x float> @v_minimumnum_v2f32_nnan(<2 x float> %x, <2 x float> %y) {
+; GFX7-LABEL: v_minimumnum_v2f32_nnan:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX7-NEXT: v_min_f32_e32 v1, v1, v3
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_minimumnum_v2f32_nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2663,6 +3330,20 @@ define <2 x float> @v_minimumnum_v2f32_nnan(<2 x float> %x, <2 x float> %y) {
}
define <3 x float> @v_minimumnum_v3f32(<3 x float> %x, <3 x float> %y) {
+; GFX7-LABEL: v_minimumnum_v3f32:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mul_f32_e32 v3, 1.0, v3
+; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-NEXT: v_min_f32_e32 v0, v0, v3
+; GFX7-NEXT: v_mul_f32_e32 v3, 1.0, v4
+; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX7-NEXT: v_min_f32_e32 v1, v1, v3
+; GFX7-NEXT: v_mul_f32_e32 v3, 1.0, v5
+; GFX7-NEXT: v_mul_f32_e32 v2, 1.0, v2
+; GFX7-NEXT: v_min_f32_e32 v2, v2, v3
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_minimumnum_v3f32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2735,6 +3416,14 @@ define <3 x float> @v_minimumnum_v3f32(<3 x float> %x, <3 x float> %y) {
}
define <3 x float> @v_minimumnum_v3f32_nnan(<3 x float> %x, <3 x float> %y) {
+; GFX7-LABEL: v_minimumnum_v3f32_nnan:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_min_f32_e32 v0, v0, v3
+; GFX7-NEXT: v_min_f32_e32 v1, v1, v4
+; GFX7-NEXT: v_min_f32_e32 v2, v2, v5
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_minimumnum_v3f32_nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2781,6 +3470,23 @@ define <3 x float> @v_minimumnum_v3f32_nnan(<3 x float> %x, <3 x float> %y) {
}
define <4 x float> @v_minimumnum_v4f32(<4 x float> %x, <4 x float> %y) {
+; GFX7-LABEL: v_minimumnum_v4f32:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mul_f32_e32 v4, 1.0, v4
+; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-NEXT: v_min_f32_e32 v0, v0, v4
+; GFX7-NEXT: v_mul_f32_e32 v4, 1.0, v5
+; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX7-NEXT: v_min_f32_e32 v1, v1, v4
+; GFX7-NEXT: v_mul_f32_e32 v4, 1.0, v6
+; GFX7-NEXT: v_mul_f32_e32 v2, 1.0, v2
+; GFX7-NEXT: v_min_f32_e32 v2, v2, v4
+; GFX7-NEXT: v_mul_f32_e32 v4, 1.0, v7
+; GFX7-NEXT: v_mul_f32_e32 v3, 1.0, v3
+; GFX7-NEXT: v_min_f32_e32 v3, v3, v4
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_minimumnum_v4f32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2864,6 +3570,15 @@ define <4 x float> @v_minimumnum_v4f32(<4 x float> %x, <4 x float> %y) {
}
define <4 x float> @v_minimumnum_v4f32_nnan(<4 x float> %x, <4 x float> %y) {
+; GFX7-LABEL: v_minimumnum_v4f32_nnan:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_min_f32_e32 v0, v0, v4
+; GFX7-NEXT: v_min_f32_e32 v1, v1, v5
+; GFX7-NEXT: v_min_f32_e32 v2, v2, v6
+; GFX7-NEXT: v_min_f32_e32 v3, v3, v7
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_minimumnum_v4f32_nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2913,6 +3628,17 @@ define <4 x float> @v_minimumnum_v4f32_nnan(<4 x float> %x, <4 x float> %y) {
}
define <2 x double> @v_minimumnum_v2f64(<2 x double> %x, <2 x double> %y) {
+; GFX7-LABEL: v_minimumnum_v2f64:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX7-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX7-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_minimumnum_v2f64:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2924,16 +3650,27 @@ define <2 x double> @v_minimumnum_v2f64(<2 x double> %x, <2 x double> %y) {
; GFX8-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_v2f64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
-; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX9-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
-; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
-; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_v2f64:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX900-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX900-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX900-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX900-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_v2f64:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX950-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX950-NEXT: v_max_f64 v[4:5], v[6:7], v[6:7]
+; GFX950-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX950-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5]
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_v2f64:
; GFX10: ; %bb.0:
@@ -2978,6 +3715,13 @@ define <2 x double> @v_minimumnum_v2f64(<2 x double> %x, <2 x double> %y) {
}
define <2 x double> @v_minimumnum_v2f64_nnan(<2 x double> %x, <2 x double> %y) {
+; GFX7-LABEL: v_minimumnum_v2f64_nnan:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX7-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_minimumnum_v2f64_nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3021,6 +3765,20 @@ define <2 x double> @v_minimumnum_v2f64_nnan(<2 x double> %x, <2 x double> %y) {
}
define <3 x double> @v_minimumnum_v3f64(<3 x double> %x, <3 x double> %y) {
+; GFX7-LABEL: v_minimumnum_v3f64:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX7-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
+; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX7-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
+; GFX7-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX7-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX7-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_minimumnum_v3f64:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3035,19 +3793,33 @@ define <3 x double> @v_minimumnum_v3f64(<3 x double> %x, <3 x double> %y) {
; GFX8-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_v3f64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
-; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX9-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
-; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX9-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
-; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
-; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
-; GFX9-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_v3f64:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX900-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
+; GFX900-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX900-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
+; GFX900-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX900-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX900-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX900-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_v3f64:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX950-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX950-NEXT: v_max_f64 v[6:7], v[8:9], v[8:9]
+; GFX950-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX950-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
+; GFX950-NEXT: v_max_f64 v[6:7], v[10:11], v[10:11]
+; GFX950-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX950-NEXT: v_min_f64 v[4:5], v[4:5], v[6:7]
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_v3f64:
; GFX10: ; %bb.0:
@@ -3101,6 +3873,14 @@ define <3 x double> @v_minimumnum_v3f64(<3 x double> %x, <3 x double> %y) {
}
define <3 x double> @v_minimumnum_v3f64_nnan(<3 x double> %x, <3 x double> %y) {
+; GFX7-LABEL: v_minimumnum_v3f64_nnan:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX7-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX7-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_minimumnum_v3f64_nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3149,6 +3929,23 @@ define <3 x double> @v_minimumnum_v3f64_nnan(<3 x double> %x, <3 x double> %y) {
}
define <4 x double> @v_minimumnum_v4f64(<4 x double> %x, <4 x double> %y) {
+; GFX7-LABEL: v_minimumnum_v4f64:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
+; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX7-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
+; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX7-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
+; GFX7-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX7-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15]
+; GFX7-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX7-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
+; GFX7-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
+; GFX7-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_minimumnum_v4f64:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3166,22 +3963,39 @@ define <4 x double> @v_minimumnum_v4f64(<4 x double> %x, <4 x double> %y) {
; GFX8-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_v4f64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
-; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX9-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX9-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
-; GFX9-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15]
-; GFX9-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
-; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
-; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
-; GFX9-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
-; GFX9-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_v4f64:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
+; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX900-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
+; GFX900-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX900-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
+; GFX900-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX900-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15]
+; GFX900-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX900-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX900-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
+; GFX900-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
+; GFX900-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_v4f64:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
+; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX950-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX950-NEXT: v_max_f64 v[8:9], v[10:11], v[10:11]
+; GFX950-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX950-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX950-NEXT: v_max_f64 v[8:9], v[12:13], v[12:13]
+; GFX950-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX950-NEXT: v_min_f64 v[4:5], v[4:5], v[8:9]
+; GFX950-NEXT: v_max_f64 v[8:9], v[14:15], v[14:15]
+; GFX950-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX950-NEXT: v_min_f64 v[6:7], v[6:7], v[8:9]
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_v4f64:
; GFX10: ; %bb.0:
@@ -3244,6 +4058,15 @@ define <4 x double> @v_minimumnum_v4f64(<4 x double> %x, <4 x double> %y) {
}
define <4 x double> @v_minimumnum_v4f64_nnan(<4 x double> %x, <4 x double> %y) {
+; GFX7-LABEL: v_minimumnum_v4f64_nnan:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX7-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
+; GFX7-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
+; GFX7-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_minimumnum_v4f64_nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
More information about the llvm-commits
mailing list