[llvm] AMDGPU: Handle f32 minimum3/maximum3 pattern for gfx950 (PR #117737)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 26 11:44:37 PST 2024
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/117737
>From d4aa2f6faefe5496501599d8dbf621e578adc4ef Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Thu, 23 May 2024 21:23:16 +0200
Subject: [PATCH] AMDGPU: Handle f32 minimum3/maximum3 pattern for gfx950
---
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 3 +-
llvm/test/CodeGen/AMDGPU/fmaximum3.ll | 146 ++++++++--------------
llvm/test/CodeGen/AMDGPU/fminimum3.ll | 146 ++++++++--------------
3 files changed, 102 insertions(+), 193 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 2e0f95161935a9..a24b6430378cc9 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -13346,7 +13346,8 @@ static bool supportsMin3Max3(const GCNSubtarget &Subtarget, unsigned Opc,
return (VT == MVT::f32) || (VT == MVT::f16 && Subtarget.hasMin3Max3_16());
case ISD::FMINIMUM:
case ISD::FMAXIMUM:
- return (VT == MVT::f32 || VT == MVT::f16) && Subtarget.hasIEEEMinMax3();
+ return (VT == MVT::f32 && Subtarget.hasMinimum3Maximum3F32()) ||
+ (VT == MVT::f16 && Subtarget.hasMinimum3Maximum3F16());
case ISD::SMAX:
case ISD::SMIN:
case ISD::UMAX:
diff --git a/llvm/test/CodeGen/AMDGPU/fmaximum3.ll b/llvm/test/CodeGen/AMDGPU/fmaximum3.ll
index 209ae86b4dedce..e771e5801f2eda 100644
--- a/llvm/test/CodeGen/AMDGPU/fmaximum3.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmaximum3.ll
@@ -31,8 +31,7 @@ define float @v_fmaximum3_f32(float %a, float %b, float %c) {
; GFX950-LABEL: v_fmaximum3_f32:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1
-; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2
+; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v2
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.maximum.f32(float %a, float %b)
%max1 = call float @llvm.maximum.f32(float %max0, float %c)
@@ -67,8 +66,7 @@ define float @v_fmaximum3_f32_commute(float %a, float %b, float %c) {
; GFX950-LABEL: v_fmaximum3_f32_commute:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1
-; GFX950-NEXT: v_maximum3_f32 v0, v2, v0, v0
+; GFX950-NEXT: v_maximum3_f32 v0, v2, v0, v1
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.maximum.f32(float %a, float %b)
%max1 = call float @llvm.maximum.f32(float %c, float %max0)
@@ -102,9 +100,9 @@ define amdgpu_ps i32 @s_fmaximum3_f32(float inreg %a, float inreg %b, float inre
;
; GFX950-LABEL: s_fmaximum3_f32:
; GFX950: ; %bb.0:
-; GFX950-NEXT: v_mov_b32_e32 v0, s0
-; GFX950-NEXT: v_maximum3_f32 v0, v0, s1, s1
-; GFX950-NEXT: v_maximum3_f32 v0, v0, s2, s2
+; GFX950-NEXT: v_mov_b32_e32 v0, s1
+; GFX950-NEXT: v_mov_b32_e32 v1, s2
+; GFX950-NEXT: v_maximum3_f32 v0, s0, v0, v1
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_readfirstlane_b32 s0, v0
; GFX950-NEXT: ; return to shader part epilog
@@ -143,8 +141,7 @@ define float @v_fmaximum3_f32_fabs0(float %a, float %b, float %c) {
; GFX950-LABEL: v_fmaximum3_f32_fabs0:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_maximum3_f32 v0, |v0|, v1, v1
-; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2
+; GFX950-NEXT: v_maximum3_f32 v0, |v0|, v1, v2
; GFX950-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call float @llvm.fabs.f32(float %a)
%max0 = call float @llvm.maximum.f32(float %a.fabs, float %b)
@@ -180,8 +177,7 @@ define float @v_fmaximum3_f32_fabs1(float %a, float %b, float %c) {
; GFX950-LABEL: v_fmaximum3_f32_fabs1:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_maximum3_f32 v0, v0, |v1|, |v1|
-; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2
+; GFX950-NEXT: v_maximum3_f32 v0, v0, |v1|, v2
; GFX950-NEXT: s_setpc_b64 s[30:31]
%b.fabs = call float @llvm.fabs.f32(float %b)
%max0 = call float @llvm.maximum.f32(float %a, float %b.fabs)
@@ -217,8 +213,7 @@ define float @v_fmaximum3_f32_fabs2(float %a, float %b, float %c) {
; GFX950-LABEL: v_fmaximum3_f32_fabs2:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1
-; GFX950-NEXT: v_maximum3_f32 v0, v0, |v2|, |v2|
+; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, |v2|
; GFX950-NEXT: s_setpc_b64 s[30:31]
%c.fabs = call float @llvm.fabs.f32(float %c)
%max0 = call float @llvm.maximum.f32(float %a, float %b)
@@ -254,8 +249,7 @@ define float @v_fmaximum3_f32_fabs_all(float %a, float %b, float %c) {
; GFX950-LABEL: v_fmaximum3_f32_fabs_all:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_maximum3_f32 v0, |v0|, |v1|, |v1|
-; GFX950-NEXT: v_maximum3_f32 v0, v0, |v2|, |v2|
+; GFX950-NEXT: v_maximum3_f32 v0, |v0|, |v1|, |v2|
; GFX950-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call float @llvm.fabs.f32(float %a)
%b.fabs = call float @llvm.fabs.f32(float %b)
@@ -293,8 +287,7 @@ define float @v_fmaximum3_f32_fneg_all(float %a, float %b, float %c) {
; GFX950-LABEL: v_fmaximum3_f32_fneg_all:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_maximum3_f32 v0, -v0, -v1, -v1
-; GFX950-NEXT: v_maximum3_f32 v0, v0, -v2, -v2
+; GFX950-NEXT: v_maximum3_f32 v0, -v0, -v1, -v2
; GFX950-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg float %a
%b.fneg = fneg float %b
@@ -332,8 +325,7 @@ define float @v_fmaximum3_f32_fneg_fabs_all(float %a, float %b, float %c) {
; GFX950-LABEL: v_fmaximum3_f32_fneg_fabs_all:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_maximum3_f32 v0, -|v0|, -|v1|, -|v1|
-; GFX950-NEXT: v_maximum3_f32 v0, v0, -|v2|, -|v2|
+; GFX950-NEXT: v_maximum3_f32 v0, -|v0|, -|v1|, -|v2|
; GFX950-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call float @llvm.fabs.f32(float %a)
%b.fabs = call float @llvm.fabs.f32(float %b)
@@ -374,8 +366,7 @@ define float @v_fmaximum3_f32_fneg0(float %a, float %b, float %c) {
; GFX950-LABEL: v_fmaximum3_f32_fneg0:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_maximum3_f32 v0, -v0, v1, v1
-; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2
+; GFX950-NEXT: v_maximum3_f32 v0, -v0, v1, v2
; GFX950-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg float %a
%max0 = call float @llvm.maximum.f32(float %a.fneg, float %b)
@@ -411,8 +402,7 @@ define float @v_fmaximum3_f32_fneg1(float %a, float %b, float %c) {
; GFX950-LABEL: v_fmaximum3_f32_fneg1:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_maximum3_f32 v0, v0, -v1, -v1
-; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2
+; GFX950-NEXT: v_maximum3_f32 v0, v0, -v1, v2
; GFX950-NEXT: s_setpc_b64 s[30:31]
%b.fneg = fneg float %b
%max0 = call float @llvm.maximum.f32(float %a, float %b.fneg)
@@ -448,8 +438,7 @@ define float @v_fmaximum3_f32_fneg2(float %a, float %b, float %c) {
; GFX950-LABEL: v_fmaximum3_f32_fneg2:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1
-; GFX950-NEXT: v_maximum3_f32 v0, v0, -v2, -v2
+; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, -v2
; GFX950-NEXT: s_setpc_b64 s[30:31]
%c.fneg = fneg float %c
%max0 = call float @llvm.maximum.f32(float %a, float %b)
@@ -486,8 +475,7 @@ define float @v_fmaximum3_f32_const0(float %b, float %c) {
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: s_mov_b32 s0, 0x41000000
-; GFX950-NEXT: v_maximum3_f32 v0, v0, s0, s0
-; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1
+; GFX950-NEXT: v_maximum3_f32 v0, v0, s0, v1
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.maximum.f32(float 8.0, float %b)
%max1 = call float @llvm.maximum.f32(float %max0, float %c)
@@ -522,9 +510,8 @@ define float @v_fmaximum3_f32__const2(float %a, float %b) {
; GFX950-LABEL: v_fmaximum3_f32__const2:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1
; GFX950-NEXT: s_mov_b32 s0, 0x41000000
-; GFX950-NEXT: v_maximum3_f32 v0, v0, s0, s0
+; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, s0
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.maximum.f32(float %a, float %b)
%max1 = call float @llvm.maximum.f32(float %max0, float 8.0)
@@ -559,8 +546,7 @@ define float @v_fmaximum3_f32_inlineimm0(float %b, float %c) {
; GFX950-LABEL: v_fmaximum3_f32_inlineimm0:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_maximum3_f32 v0, v0, 4.0, 4.0
-; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1
+; GFX950-NEXT: v_maximum3_f32 v0, v0, 4.0, v1
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.maximum.f32(float 4.0, float %b)
%max1 = call float @llvm.maximum.f32(float %max0, float %c)
@@ -595,8 +581,7 @@ define float @v_fmaximum3_f32__inlineimm(float %a, float %b) {
; GFX950-LABEL: v_fmaximum3_f32__inlineimm:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1
-; GFX950-NEXT: v_maximum3_f32 v0, v0, 4.0, 4.0
+; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, 4.0
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.maximum.f32(float %a, float %b)
%max1 = call float @llvm.maximum.f32(float %max0, float 4.0)
@@ -634,9 +619,8 @@ define float @v_fmaximum3_f32_const1_const2(float %a) {
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: s_mov_b32 s0, 0x41000000
-; GFX950-NEXT: v_maximum3_f32 v0, v0, s0, s0
-; GFX950-NEXT: s_mov_b32 s0, 0x41800000
-; GFX950-NEXT: v_maximum3_f32 v0, v0, s0, s0
+; GFX950-NEXT: v_mov_b32_e32 v1, 0x41800000
+; GFX950-NEXT: v_maximum3_f32 v0, v0, s0, v1
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.maximum.f32(float %a, float 8.0)
%max1 = call float @llvm.maximum.f32(float %max0, float 16.0)
@@ -680,10 +664,8 @@ define <2 x float> @v_fmaximum3_v2f32(<2 x float> %a, <2 x float> %b, <2 x float
; GFX950-LABEL: v_fmaximum3_v2f32:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_maximum3_f32 v1, v1, v3, v3
-; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2
-; GFX950-NEXT: v_maximum3_f32 v0, v4, v0, v0
-; GFX950-NEXT: v_maximum3_f32 v1, v5, v1, v1
+; GFX950-NEXT: v_maximum3_f32 v0, v4, v0, v2
+; GFX950-NEXT: v_maximum3_f32 v1, v5, v1, v3
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %a, <2 x float> %b)
%max1 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %c, <2 x float> %max0)
@@ -727,10 +709,8 @@ define <2 x float> @v_fmaximum3_v2f32_commute(<2 x float> %a, <2 x float> %b, <2
; GFX950-LABEL: v_fmaximum3_v2f32_commute:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_maximum3_f32 v1, v1, v3, v3
-; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2
-; GFX950-NEXT: v_maximum3_f32 v0, v0, v4, v4
-; GFX950-NEXT: v_maximum3_f32 v1, v1, v5, v5
+; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v4
+; GFX950-NEXT: v_maximum3_f32 v1, v1, v3, v5
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %a, <2 x float> %b)
%max1 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %max0, <2 x float> %c)
@@ -774,10 +754,8 @@ define <2 x float> @v_fmaximum3_v2f32__fabs_all(<2 x float> %a, <2 x float> %b,
; GFX950-LABEL: v_fmaximum3_v2f32__fabs_all:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_maximum3_f32 v1, |v1|, |v3|, |v3|
-; GFX950-NEXT: v_maximum3_f32 v0, |v0|, |v2|, |v2|
-; GFX950-NEXT: v_maximum3_f32 v0, v0, |v4|, |v4|
-; GFX950-NEXT: v_maximum3_f32 v1, v1, |v5|, |v5|
+; GFX950-NEXT: v_maximum3_f32 v0, |v0|, |v2|, |v4|
+; GFX950-NEXT: v_maximum3_f32 v1, |v1|, |v3|, |v5|
; GFX950-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a)
%b.fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %b)
@@ -824,10 +802,8 @@ define <2 x float> @v_fmaximum3_v2f32__fneg_all(<2 x float> %a, <2 x float> %b,
; GFX950-LABEL: v_fmaximum3_v2f32__fneg_all:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_maximum3_f32 v1, -v1, -v3, -v3
-; GFX950-NEXT: v_maximum3_f32 v0, -v0, -v2, -v2
-; GFX950-NEXT: v_maximum3_f32 v0, v0, -v4, -v4
-; GFX950-NEXT: v_maximum3_f32 v1, v1, -v5, -v5
+; GFX950-NEXT: v_maximum3_f32 v0, -v0, -v2, -v4
+; GFX950-NEXT: v_maximum3_f32 v1, -v1, -v3, -v5
; GFX950-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg <2 x float> %a
%b.fneg = fneg <2 x float> %b
@@ -874,10 +850,8 @@ define <2 x float> @v_fmaximum3_v2f32__inlineimm1(<2 x float> %a, <2 x float> %c
; GFX950-LABEL: v_fmaximum3_v2f32__inlineimm1:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_maximum3_f32 v1, v1, 2.0, 2.0
-; GFX950-NEXT: v_maximum3_f32 v0, v0, 2.0, 2.0
-; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2
-; GFX950-NEXT: v_maximum3_f32 v1, v1, v3, v3
+; GFX950-NEXT: v_maximum3_f32 v0, v0, 2.0, v2
+; GFX950-NEXT: v_maximum3_f32 v1, v1, 2.0, v3
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %a, <2 x float> <float 2.0, float 2.0>)
%max1 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %max0, <2 x float> %c)
@@ -921,10 +895,8 @@ define <2 x float> @v_fmaximum3_v2f32__inlineimm2(<2 x float> %a, <2 x float> %b
; GFX950-LABEL: v_fmaximum3_v2f32__inlineimm2:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_maximum3_f32 v1, v1, v3, v3
-; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2
-; GFX950-NEXT: v_maximum3_f32 v0, v0, 4.0, 4.0
-; GFX950-NEXT: v_maximum3_f32 v1, v1, 4.0, 4.0
+; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, 4.0
+; GFX950-NEXT: v_maximum3_f32 v1, v1, v3, 4.0
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %a, <2 x float> %b)
%max1 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %max0, <2 x float> <float 4.0, float 4.0>)
@@ -977,12 +949,9 @@ define <3 x float> @v_fmaximum3_v3f32(<3 x float> %a, <3 x float> %b, <3 x float
; GFX950-LABEL: v_fmaximum3_v3f32:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_maximum3_f32 v2, v2, v5, v5
-; GFX950-NEXT: v_maximum3_f32 v1, v1, v4, v4
-; GFX950-NEXT: v_maximum3_f32 v0, v0, v3, v3
-; GFX950-NEXT: v_maximum3_f32 v0, v6, v0, v0
-; GFX950-NEXT: v_maximum3_f32 v1, v7, v1, v1
-; GFX950-NEXT: v_maximum3_f32 v2, v8, v2, v2
+; GFX950-NEXT: v_maximum3_f32 v0, v6, v0, v3
+; GFX950-NEXT: v_maximum3_f32 v1, v7, v1, v4
+; GFX950-NEXT: v_maximum3_f32 v2, v8, v2, v5
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %a, <3 x float> %b)
%max1 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %c, <3 x float> %max0)
@@ -1035,12 +1004,9 @@ define <3 x float> @v_fmaximum3_v3f32_commute(<3 x float> %a, <3 x float> %b, <3
; GFX950-LABEL: v_fmaximum3_v3f32_commute:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_maximum3_f32 v2, v2, v5, v5
-; GFX950-NEXT: v_maximum3_f32 v1, v1, v4, v4
-; GFX950-NEXT: v_maximum3_f32 v0, v0, v3, v3
-; GFX950-NEXT: v_maximum3_f32 v0, v0, v6, v6
-; GFX950-NEXT: v_maximum3_f32 v1, v1, v7, v7
-; GFX950-NEXT: v_maximum3_f32 v2, v2, v8, v8
+; GFX950-NEXT: v_maximum3_f32 v0, v0, v3, v6
+; GFX950-NEXT: v_maximum3_f32 v1, v1, v4, v7
+; GFX950-NEXT: v_maximum3_f32 v2, v2, v5, v8
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %a, <3 x float> %b)
%max1 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %max0, <3 x float> %c)
@@ -1093,12 +1059,9 @@ define <3 x float> @v_fmaximum3_v3f32__fabs_all(<3 x float> %a, <3 x float> %b,
; GFX950-LABEL: v_fmaximum3_v3f32__fabs_all:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_maximum3_f32 v2, |v2|, |v5|, |v5|
-; GFX950-NEXT: v_maximum3_f32 v1, |v1|, |v4|, |v4|
-; GFX950-NEXT: v_maximum3_f32 v0, |v0|, |v3|, |v3|
-; GFX950-NEXT: v_maximum3_f32 v0, v0, |v6|, |v6|
-; GFX950-NEXT: v_maximum3_f32 v1, v1, |v7|, |v7|
-; GFX950-NEXT: v_maximum3_f32 v2, v2, |v8|, |v8|
+; GFX950-NEXT: v_maximum3_f32 v0, |v0|, |v3|, |v6|
+; GFX950-NEXT: v_maximum3_f32 v1, |v1|, |v4|, |v7|
+; GFX950-NEXT: v_maximum3_f32 v2, |v2|, |v5|, |v8|
; GFX950-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call <3 x float> @llvm.fabs.v3f32(<3 x float> %a)
%b.fabs = call <3 x float> @llvm.fabs.v3f32(<3 x float> %b)
@@ -1154,12 +1117,9 @@ define <3 x float> @v_fmaximum3_v3f32__fneg_all(<3 x float> %a, <3 x float> %b,
; GFX950-LABEL: v_fmaximum3_v3f32__fneg_all:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_maximum3_f32 v2, -v2, -v5, -v5
-; GFX950-NEXT: v_maximum3_f32 v1, -v1, -v4, -v4
-; GFX950-NEXT: v_maximum3_f32 v0, -v0, -v3, -v3
-; GFX950-NEXT: v_maximum3_f32 v0, v0, -v6, -v6
-; GFX950-NEXT: v_maximum3_f32 v1, v1, -v7, -v7
-; GFX950-NEXT: v_maximum3_f32 v2, v2, -v8, -v8
+; GFX950-NEXT: v_maximum3_f32 v0, -v0, -v3, -v6
+; GFX950-NEXT: v_maximum3_f32 v1, -v1, -v4, -v7
+; GFX950-NEXT: v_maximum3_f32 v2, -v2, -v5, -v8
; GFX950-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg <3 x float> %a
%b.fneg = fneg <3 x float> %b
@@ -1215,12 +1175,9 @@ define <3 x float> @v_fmaximum3_v3f32__inlineimm1(<3 x float> %a, <3 x float> %c
; GFX950-LABEL: v_fmaximum3_v3f32__inlineimm1:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_maximum3_f32 v2, v2, 2.0, 2.0
-; GFX950-NEXT: v_maximum3_f32 v1, v1, 2.0, 2.0
-; GFX950-NEXT: v_maximum3_f32 v0, v0, 2.0, 2.0
-; GFX950-NEXT: v_maximum3_f32 v0, v0, v3, v3
-; GFX950-NEXT: v_maximum3_f32 v1, v1, v4, v4
-; GFX950-NEXT: v_maximum3_f32 v2, v2, v5, v5
+; GFX950-NEXT: v_maximum3_f32 v0, v0, 2.0, v3
+; GFX950-NEXT: v_maximum3_f32 v1, v1, 2.0, v4
+; GFX950-NEXT: v_maximum3_f32 v2, v2, 2.0, v5
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %a, <3 x float> <float 2.0, float 2.0, float 2.0>)
%max1 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %max0, <3 x float> %c)
@@ -1273,12 +1230,9 @@ define <3 x float> @v_fmaximum3_v3f32__inlineimm2(<3 x float> %a, <3 x float> %b
; GFX950-LABEL: v_fmaximum3_v3f32__inlineimm2:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_maximum3_f32 v2, v2, v5, v5
-; GFX950-NEXT: v_maximum3_f32 v1, v1, v4, v4
-; GFX950-NEXT: v_maximum3_f32 v0, v0, v3, v3
-; GFX950-NEXT: v_maximum3_f32 v0, v0, 4.0, 4.0
-; GFX950-NEXT: v_maximum3_f32 v1, v1, 4.0, 4.0
-; GFX950-NEXT: v_maximum3_f32 v2, v2, 4.0, 4.0
+; GFX950-NEXT: v_maximum3_f32 v0, v0, v3, 4.0
+; GFX950-NEXT: v_maximum3_f32 v1, v1, v4, 4.0
+; GFX950-NEXT: v_maximum3_f32 v2, v2, v5, 4.0
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %a, <3 x float> %b)
%max1 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %max0, <3 x float> <float 4.0, float 4.0, float 4.0>)
diff --git a/llvm/test/CodeGen/AMDGPU/fminimum3.ll b/llvm/test/CodeGen/AMDGPU/fminimum3.ll
index 000f6c190b9773..6a697aadbf3ba8 100644
--- a/llvm/test/CodeGen/AMDGPU/fminimum3.ll
+++ b/llvm/test/CodeGen/AMDGPU/fminimum3.ll
@@ -31,8 +31,7 @@ define float @v_fminimum3_f32(float %a, float %b, float %c) {
; GFX950-LABEL: v_fminimum3_f32:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, v1
-; GFX950-NEXT: v_minimum3_f32 v0, v0, v2, v2
+; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, v2
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.minimum.f32(float %a, float %b)
%max1 = call float @llvm.minimum.f32(float %max0, float %c)
@@ -67,8 +66,7 @@ define float @v_fminimum3_f32_commute(float %a, float %b, float %c) {
; GFX950-LABEL: v_fminimum3_f32_commute:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, v1
-; GFX950-NEXT: v_minimum3_f32 v0, v2, v0, v0
+; GFX950-NEXT: v_minimum3_f32 v0, v2, v0, v1
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.minimum.f32(float %a, float %b)
%max1 = call float @llvm.minimum.f32(float %c, float %max0)
@@ -102,9 +100,9 @@ define amdgpu_ps i32 @s_fminimum3_f32(float inreg %a, float inreg %b, float inre
;
; GFX950-LABEL: s_fminimum3_f32:
; GFX950: ; %bb.0:
-; GFX950-NEXT: v_mov_b32_e32 v0, s0
-; GFX950-NEXT: v_minimum3_f32 v0, v0, s1, s1
-; GFX950-NEXT: v_minimum3_f32 v0, v0, s2, s2
+; GFX950-NEXT: v_mov_b32_e32 v0, s1
+; GFX950-NEXT: v_mov_b32_e32 v1, s2
+; GFX950-NEXT: v_minimum3_f32 v0, s0, v0, v1
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_readfirstlane_b32 s0, v0
; GFX950-NEXT: ; return to shader part epilog
@@ -143,8 +141,7 @@ define float @v_fminimum3_f32_fabs0(float %a, float %b, float %c) {
; GFX950-LABEL: v_fminimum3_f32_fabs0:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_minimum3_f32 v0, |v0|, v1, v1
-; GFX950-NEXT: v_minimum3_f32 v0, v0, v2, v2
+; GFX950-NEXT: v_minimum3_f32 v0, |v0|, v1, v2
; GFX950-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call float @llvm.fabs.f32(float %a)
%max0 = call float @llvm.minimum.f32(float %a.fabs, float %b)
@@ -180,8 +177,7 @@ define float @v_fminimum3_f32_fabs1(float %a, float %b, float %c) {
; GFX950-LABEL: v_fminimum3_f32_fabs1:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_minimum3_f32 v0, v0, |v1|, |v1|
-; GFX950-NEXT: v_minimum3_f32 v0, v0, v2, v2
+; GFX950-NEXT: v_minimum3_f32 v0, v0, |v1|, v2
; GFX950-NEXT: s_setpc_b64 s[30:31]
%b.fabs = call float @llvm.fabs.f32(float %b)
%max0 = call float @llvm.minimum.f32(float %a, float %b.fabs)
@@ -217,8 +213,7 @@ define float @v_fminimum3_f32_fabs2(float %a, float %b, float %c) {
; GFX950-LABEL: v_fminimum3_f32_fabs2:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, v1
-; GFX950-NEXT: v_minimum3_f32 v0, v0, |v2|, |v2|
+; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, |v2|
; GFX950-NEXT: s_setpc_b64 s[30:31]
%c.fabs = call float @llvm.fabs.f32(float %c)
%max0 = call float @llvm.minimum.f32(float %a, float %b)
@@ -254,8 +249,7 @@ define float @v_fminimum3_f32_fabs_all(float %a, float %b, float %c) {
; GFX950-LABEL: v_fminimum3_f32_fabs_all:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_minimum3_f32 v0, |v0|, |v1|, |v1|
-; GFX950-NEXT: v_minimum3_f32 v0, v0, |v2|, |v2|
+; GFX950-NEXT: v_minimum3_f32 v0, |v0|, |v1|, |v2|
; GFX950-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call float @llvm.fabs.f32(float %a)
%b.fabs = call float @llvm.fabs.f32(float %b)
@@ -293,8 +287,7 @@ define float @v_fminimum3_f32_fneg_all(float %a, float %b, float %c) {
; GFX950-LABEL: v_fminimum3_f32_fneg_all:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_minimum3_f32 v0, -v0, -v1, -v1
-; GFX950-NEXT: v_minimum3_f32 v0, v0, -v2, -v2
+; GFX950-NEXT: v_minimum3_f32 v0, -v0, -v1, -v2
; GFX950-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg float %a
%b.fneg = fneg float %b
@@ -332,8 +325,7 @@ define float @v_fminimum3_f32_fneg_fabs_all(float %a, float %b, float %c) {
; GFX950-LABEL: v_fminimum3_f32_fneg_fabs_all:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_minimum3_f32 v0, -|v0|, -|v1|, -|v1|
-; GFX950-NEXT: v_minimum3_f32 v0, v0, -|v2|, -|v2|
+; GFX950-NEXT: v_minimum3_f32 v0, -|v0|, -|v1|, -|v2|
; GFX950-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call float @llvm.fabs.f32(float %a)
%b.fabs = call float @llvm.fabs.f32(float %b)
@@ -374,8 +366,7 @@ define float @v_fminimum3_f32_fneg0(float %a, float %b, float %c) {
; GFX950-LABEL: v_fminimum3_f32_fneg0:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_minimum3_f32 v0, -v0, v1, v1
-; GFX950-NEXT: v_minimum3_f32 v0, v0, v2, v2
+; GFX950-NEXT: v_minimum3_f32 v0, -v0, v1, v2
; GFX950-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg float %a
%max0 = call float @llvm.minimum.f32(float %a.fneg, float %b)
@@ -411,8 +402,7 @@ define float @v_fminimum3_f32_fneg1(float %a, float %b, float %c) {
; GFX950-LABEL: v_fminimum3_f32_fneg1:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_minimum3_f32 v0, v0, -v1, -v1
-; GFX950-NEXT: v_minimum3_f32 v0, v0, v2, v2
+; GFX950-NEXT: v_minimum3_f32 v0, v0, -v1, v2
; GFX950-NEXT: s_setpc_b64 s[30:31]
%b.fneg = fneg float %b
%max0 = call float @llvm.minimum.f32(float %a, float %b.fneg)
@@ -448,8 +438,7 @@ define float @v_fminimum3_f32_fneg2(float %a, float %b, float %c) {
; GFX950-LABEL: v_fminimum3_f32_fneg2:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, v1
-; GFX950-NEXT: v_minimum3_f32 v0, v0, -v2, -v2
+; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, -v2
; GFX950-NEXT: s_setpc_b64 s[30:31]
%c.fneg = fneg float %c
%max0 = call float @llvm.minimum.f32(float %a, float %b)
@@ -486,8 +475,7 @@ define float @v_fminimum3_f32_const0(float %b, float %c) {
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: s_mov_b32 s0, 0x41000000
-; GFX950-NEXT: v_minimum3_f32 v0, v0, s0, s0
-; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, v1
+; GFX950-NEXT: v_minimum3_f32 v0, v0, s0, v1
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.minimum.f32(float 8.0, float %b)
%max1 = call float @llvm.minimum.f32(float %max0, float %c)
@@ -522,9 +510,8 @@ define float @v_fminimum3_f32__const2(float %a, float %b) {
; GFX950-LABEL: v_fminimum3_f32__const2:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, v1
; GFX950-NEXT: s_mov_b32 s0, 0x41000000
-; GFX950-NEXT: v_minimum3_f32 v0, v0, s0, s0
+; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, s0
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.minimum.f32(float %a, float %b)
%max1 = call float @llvm.minimum.f32(float %max0, float 8.0)
@@ -559,8 +546,7 @@ define float @v_fminimum3_f32_inlineimm0(float %b, float %c) {
; GFX950-LABEL: v_fminimum3_f32_inlineimm0:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_minimum3_f32 v0, v0, 4.0, 4.0
-; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, v1
+; GFX950-NEXT: v_minimum3_f32 v0, v0, 4.0, v1
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.minimum.f32(float 4.0, float %b)
%max1 = call float @llvm.minimum.f32(float %max0, float %c)
@@ -595,8 +581,7 @@ define float @v_fminimum3_f32__inlineimm(float %a, float %b) {
; GFX950-LABEL: v_fminimum3_f32__inlineimm:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, v1
-; GFX950-NEXT: v_minimum3_f32 v0, v0, 4.0, 4.0
+; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, 4.0
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.minimum.f32(float %a, float %b)
%max1 = call float @llvm.minimum.f32(float %max0, float 4.0)
@@ -634,9 +619,8 @@ define float @v_fminimum3_f32_const1_const2(float %a) {
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: s_mov_b32 s0, 0x41000000
-; GFX950-NEXT: v_minimum3_f32 v0, v0, s0, s0
-; GFX950-NEXT: s_mov_b32 s0, 0x41800000
-; GFX950-NEXT: v_minimum3_f32 v0, v0, s0, s0
+; GFX950-NEXT: v_mov_b32_e32 v1, 0x41800000
+; GFX950-NEXT: v_minimum3_f32 v0, v0, s0, v1
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.minimum.f32(float %a, float 8.0)
%max1 = call float @llvm.minimum.f32(float %max0, float 16.0)
@@ -680,10 +664,8 @@ define <2 x float> @v_fminimum3_v2f32(<2 x float> %a, <2 x float> %b, <2 x float
; GFX950-LABEL: v_fminimum3_v2f32:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_minimum3_f32 v1, v1, v3, v3
-; GFX950-NEXT: v_minimum3_f32 v0, v0, v2, v2
-; GFX950-NEXT: v_minimum3_f32 v0, v4, v0, v0
-; GFX950-NEXT: v_minimum3_f32 v1, v5, v1, v1
+; GFX950-NEXT: v_minimum3_f32 v0, v4, v0, v2
+; GFX950-NEXT: v_minimum3_f32 v1, v5, v1, v3
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %a, <2 x float> %b)
%max1 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %c, <2 x float> %max0)
@@ -727,10 +709,8 @@ define <2 x float> @v_fminimum3_v2f32_commute(<2 x float> %a, <2 x float> %b, <2
; GFX950-LABEL: v_fminimum3_v2f32_commute:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_minimum3_f32 v1, v1, v3, v3
-; GFX950-NEXT: v_minimum3_f32 v0, v0, v2, v2
-; GFX950-NEXT: v_minimum3_f32 v0, v0, v4, v4
-; GFX950-NEXT: v_minimum3_f32 v1, v1, v5, v5
+; GFX950-NEXT: v_minimum3_f32 v0, v0, v2, v4
+; GFX950-NEXT: v_minimum3_f32 v1, v1, v3, v5
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %a, <2 x float> %b)
%max1 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %max0, <2 x float> %c)
@@ -774,10 +754,8 @@ define <2 x float> @v_fminimum3_v2f32__fabs_all(<2 x float> %a, <2 x float> %b,
; GFX950-LABEL: v_fminimum3_v2f32__fabs_all:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_minimum3_f32 v1, |v1|, |v3|, |v3|
-; GFX950-NEXT: v_minimum3_f32 v0, |v0|, |v2|, |v2|
-; GFX950-NEXT: v_minimum3_f32 v0, v0, |v4|, |v4|
-; GFX950-NEXT: v_minimum3_f32 v1, v1, |v5|, |v5|
+; GFX950-NEXT: v_minimum3_f32 v0, |v0|, |v2|, |v4|
+; GFX950-NEXT: v_minimum3_f32 v1, |v1|, |v3|, |v5|
; GFX950-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a)
%b.fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %b)
@@ -824,10 +802,8 @@ define <2 x float> @v_fminimum3_v2f32__fneg_all(<2 x float> %a, <2 x float> %b,
; GFX950-LABEL: v_fminimum3_v2f32__fneg_all:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_minimum3_f32 v1, -v1, -v3, -v3
-; GFX950-NEXT: v_minimum3_f32 v0, -v0, -v2, -v2
-; GFX950-NEXT: v_minimum3_f32 v0, v0, -v4, -v4
-; GFX950-NEXT: v_minimum3_f32 v1, v1, -v5, -v5
+; GFX950-NEXT: v_minimum3_f32 v0, -v0, -v2, -v4
+; GFX950-NEXT: v_minimum3_f32 v1, -v1, -v3, -v5
; GFX950-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg <2 x float> %a
%b.fneg = fneg <2 x float> %b
@@ -874,10 +850,8 @@ define <2 x float> @v_fminimum3_v2f32__inlineimm1(<2 x float> %a, <2 x float> %c
; GFX950-LABEL: v_fminimum3_v2f32__inlineimm1:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_minimum3_f32 v1, v1, 2.0, 2.0
-; GFX950-NEXT: v_minimum3_f32 v0, v0, 2.0, 2.0
-; GFX950-NEXT: v_minimum3_f32 v0, v0, v2, v2
-; GFX950-NEXT: v_minimum3_f32 v1, v1, v3, v3
+; GFX950-NEXT: v_minimum3_f32 v0, v0, 2.0, v2
+; GFX950-NEXT: v_minimum3_f32 v1, v1, 2.0, v3
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %a, <2 x float> <float 2.0, float 2.0>)
%max1 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %max0, <2 x float> %c)
@@ -921,10 +895,8 @@ define <2 x float> @v_fminimum3_v2f32__inlineimm2(<2 x float> %a, <2 x float> %b
; GFX950-LABEL: v_fminimum3_v2f32__inlineimm2:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_minimum3_f32 v1, v1, v3, v3
-; GFX950-NEXT: v_minimum3_f32 v0, v0, v2, v2
-; GFX950-NEXT: v_minimum3_f32 v0, v0, 4.0, 4.0
-; GFX950-NEXT: v_minimum3_f32 v1, v1, 4.0, 4.0
+; GFX950-NEXT: v_minimum3_f32 v0, v0, v2, 4.0
+; GFX950-NEXT: v_minimum3_f32 v1, v1, v3, 4.0
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %a, <2 x float> %b)
%max1 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %max0, <2 x float> <float 4.0, float 4.0>)
@@ -977,12 +949,9 @@ define <3 x float> @v_fminimum3_v3f32(<3 x float> %a, <3 x float> %b, <3 x float
; GFX950-LABEL: v_fminimum3_v3f32:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_minimum3_f32 v2, v2, v5, v5
-; GFX950-NEXT: v_minimum3_f32 v1, v1, v4, v4
-; GFX950-NEXT: v_minimum3_f32 v0, v0, v3, v3
-; GFX950-NEXT: v_minimum3_f32 v0, v6, v0, v0
-; GFX950-NEXT: v_minimum3_f32 v1, v7, v1, v1
-; GFX950-NEXT: v_minimum3_f32 v2, v8, v2, v2
+; GFX950-NEXT: v_minimum3_f32 v0, v6, v0, v3
+; GFX950-NEXT: v_minimum3_f32 v1, v7, v1, v4
+; GFX950-NEXT: v_minimum3_f32 v2, v8, v2, v5
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %a, <3 x float> %b)
%max1 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %c, <3 x float> %max0)
@@ -1035,12 +1004,9 @@ define <3 x float> @v_fminimum3_v3f32_commute(<3 x float> %a, <3 x float> %b, <3
; GFX950-LABEL: v_fminimum3_v3f32_commute:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_minimum3_f32 v2, v2, v5, v5
-; GFX950-NEXT: v_minimum3_f32 v1, v1, v4, v4
-; GFX950-NEXT: v_minimum3_f32 v0, v0, v3, v3
-; GFX950-NEXT: v_minimum3_f32 v0, v0, v6, v6
-; GFX950-NEXT: v_minimum3_f32 v1, v1, v7, v7
-; GFX950-NEXT: v_minimum3_f32 v2, v2, v8, v8
+; GFX950-NEXT: v_minimum3_f32 v0, v0, v3, v6
+; GFX950-NEXT: v_minimum3_f32 v1, v1, v4, v7
+; GFX950-NEXT: v_minimum3_f32 v2, v2, v5, v8
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %a, <3 x float> %b)
%max1 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %max0, <3 x float> %c)
@@ -1093,12 +1059,9 @@ define <3 x float> @v_fminimum3_v3f32__fabs_all(<3 x float> %a, <3 x float> %b,
; GFX950-LABEL: v_fminimum3_v3f32__fabs_all:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_minimum3_f32 v2, |v2|, |v5|, |v5|
-; GFX950-NEXT: v_minimum3_f32 v1, |v1|, |v4|, |v4|
-; GFX950-NEXT: v_minimum3_f32 v0, |v0|, |v3|, |v3|
-; GFX950-NEXT: v_minimum3_f32 v0, v0, |v6|, |v6|
-; GFX950-NEXT: v_minimum3_f32 v1, v1, |v7|, |v7|
-; GFX950-NEXT: v_minimum3_f32 v2, v2, |v8|, |v8|
+; GFX950-NEXT: v_minimum3_f32 v0, |v0|, |v3|, |v6|
+; GFX950-NEXT: v_minimum3_f32 v1, |v1|, |v4|, |v7|
+; GFX950-NEXT: v_minimum3_f32 v2, |v2|, |v5|, |v8|
; GFX950-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call <3 x float> @llvm.fabs.v3f32(<3 x float> %a)
%b.fabs = call <3 x float> @llvm.fabs.v3f32(<3 x float> %b)
@@ -1154,12 +1117,9 @@ define <3 x float> @v_fminimum3_v3f32__fneg_all(<3 x float> %a, <3 x float> %b,
; GFX950-LABEL: v_fminimum3_v3f32__fneg_all:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_minimum3_f32 v2, -v2, -v5, -v5
-; GFX950-NEXT: v_minimum3_f32 v1, -v1, -v4, -v4
-; GFX950-NEXT: v_minimum3_f32 v0, -v0, -v3, -v3
-; GFX950-NEXT: v_minimum3_f32 v0, v0, -v6, -v6
-; GFX950-NEXT: v_minimum3_f32 v1, v1, -v7, -v7
-; GFX950-NEXT: v_minimum3_f32 v2, v2, -v8, -v8
+; GFX950-NEXT: v_minimum3_f32 v0, -v0, -v3, -v6
+; GFX950-NEXT: v_minimum3_f32 v1, -v1, -v4, -v7
+; GFX950-NEXT: v_minimum3_f32 v2, -v2, -v5, -v8
; GFX950-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg <3 x float> %a
%b.fneg = fneg <3 x float> %b
@@ -1215,12 +1175,9 @@ define <3 x float> @v_fminimum3_v3f32__inlineimm1(<3 x float> %a, <3 x float> %c
; GFX950-LABEL: v_fminimum3_v3f32__inlineimm1:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_minimum3_f32 v2, v2, 2.0, 2.0
-; GFX950-NEXT: v_minimum3_f32 v1, v1, 2.0, 2.0
-; GFX950-NEXT: v_minimum3_f32 v0, v0, 2.0, 2.0
-; GFX950-NEXT: v_minimum3_f32 v0, v0, v3, v3
-; GFX950-NEXT: v_minimum3_f32 v1, v1, v4, v4
-; GFX950-NEXT: v_minimum3_f32 v2, v2, v5, v5
+; GFX950-NEXT: v_minimum3_f32 v0, v0, 2.0, v3
+; GFX950-NEXT: v_minimum3_f32 v1, v1, 2.0, v4
+; GFX950-NEXT: v_minimum3_f32 v2, v2, 2.0, v5
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %a, <3 x float> <float 2.0, float 2.0, float 2.0>)
%max1 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %max0, <3 x float> %c)
@@ -1273,12 +1230,9 @@ define <3 x float> @v_fminimum3_v3f32__inlineimm2(<3 x float> %a, <3 x float> %b
; GFX950-LABEL: v_fminimum3_v3f32__inlineimm2:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_minimum3_f32 v2, v2, v5, v5
-; GFX950-NEXT: v_minimum3_f32 v1, v1, v4, v4
-; GFX950-NEXT: v_minimum3_f32 v0, v0, v3, v3
-; GFX950-NEXT: v_minimum3_f32 v0, v0, 4.0, 4.0
-; GFX950-NEXT: v_minimum3_f32 v1, v1, 4.0, 4.0
-; GFX950-NEXT: v_minimum3_f32 v2, v2, 4.0, 4.0
+; GFX950-NEXT: v_minimum3_f32 v0, v0, v3, 4.0
+; GFX950-NEXT: v_minimum3_f32 v1, v1, v4, 4.0
+; GFX950-NEXT: v_minimum3_f32 v2, v2, v5, 4.0
; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %a, <3 x float> %b)
%max1 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %max0, <3 x float> <float 4.0, float 4.0, float 4.0>)
More information about the llvm-commits
mailing list