[llvm-branch-commits] [llvm] AMDGPU: Legalize fminimum and fmaximum f32 for gfx950 (PR #117634)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Nov 25 13:59:42 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
@llvm/pr-subscribers-mc
Author: Matt Arsenault (arsenm)
<details>
<summary>Changes</summary>
Select to minimum3/maximum3. Leave f16/v2f16 for later
since it's complicated by only having the vector version.
---
Patch is 185.07 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/117634.diff
6 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+4)
- (modified) llvm/lib/Target/AMDGPU/VOP3Instructions.td (+17)
- (modified) llvm/test/CodeGen/AMDGPU/fmaximum3.ll (+813-555)
- (modified) llvm/test/CodeGen/AMDGPU/fminimum3.ll (+813-555)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll (+165-256)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll (+165-256)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index d35bb15ac6566a..914b25245c95bf 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -855,6 +855,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction({ISD::FMINIMUM, ISD::FMAXIMUM},
{MVT::v4f16, MVT::v8f16, MVT::v16f16, MVT::v32f16},
Custom);
+ } else {
+ // FIXME: For nnan fmaximum, emit the fmaximum3 instead of fmaxnum
+ if (Subtarget->hasMinimum3Maximum3F32())
+ setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Legal);
}
setOperationAction(ISD::INTRINSIC_WO_CHAIN,
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 5d4d56e8b0ad22..2b207e008581b3 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -1234,6 +1234,23 @@ def : IntClampPat<V_MQSAD_PK_U16_U8_e64, int_amdgcn_mqsad_pk_u16_u8>;
def : IntClampPat<V_QSAD_PK_U16_U8_e64, int_amdgcn_qsad_pk_u16_u8>;
def : IntClampPat<V_MQSAD_U32_U8_e64, int_amdgcn_mqsad_u32_u8>;
+//===----------------------------------------------------------------------===//
+// Floating-point operation Patterns
+//===----------------------------------------------------------------------===//
+
+// Implement fminimum(x, y) by using minimum3(x, y, y)
+class MinimumMaximumByMinimum3Maximum3<SDPatternOperator node, ValueType vt,
+ Instruction inst> : GCNPat<
+ (vt (node (VOP3Mods vt:$src0, i32:$src0_mods), (VOP3Mods vt:$src1, i32:$src1_mods))),
+ (inst $src0_mods, $src0, $src1_mods, $src1, $src1_mods, $src1)
+>;
+
+// Prefer the real 2 operand form if legal
+let SubtargetPredicate = HasMinimum3Maximum3F32, AddedComplexity = -1000 in {
+def : MinimumMaximumByMinimum3Maximum3<fminimum, f32, V_MINIMUM3_F32_e64>;
+def : MinimumMaximumByMinimum3Maximum3<fmaximum, f32, V_MAXIMUM3_F32_e64>;
+}
+
//===----------------------------------------------------------------------===//
// Target-specific instruction encodings.
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/AMDGPU/fmaximum3.ll b/llvm/test/CodeGen/AMDGPU/fmaximum3.ll
index 08122cd0d89eab..209ae86b4dedce 100644
--- a/llvm/test/CodeGen/AMDGPU/fmaximum3.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmaximum3.ll
@@ -14,19 +14,26 @@ define float @v_fmaximum3_f32(float %a, float %b, float %c) {
; GFX12-NEXT: v_maximum3_f32 v0, v0, v1, v2
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fmaximum3_f32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v3, v0, v1
-; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-NEXT: v_max_f32_e32 v1, v0, v2
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fmaximum3_f32:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_max_f32_e32 v3, v0, v1
+; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX940-NEXT: v_max_f32_e32 v1, v0, v2
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fmaximum3_f32:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1
+; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.maximum.f32(float %a, float %b)
%max1 = call float @llvm.maximum.f32(float %max0, float %c)
ret float %max1
@@ -43,19 +50,26 @@ define float @v_fmaximum3_f32_commute(float %a, float %b, float %c) {
; GFX12-NEXT: v_maximum3_f32 v0, v2, v0, v1
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fmaximum3_f32_commute:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v3, v0, v1
-; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-NEXT: v_max_f32_e32 v1, v2, v0
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v0
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fmaximum3_f32_commute:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_max_f32_e32 v3, v0, v1
+; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX940-NEXT: v_max_f32_e32 v1, v2, v0
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v0
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fmaximum3_f32_commute:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1
+; GFX950-NEXT: v_maximum3_f32 v0, v2, v0, v0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%max0 = call float @llvm.maximum.f32(float %a, float %b)
%max1 = call float @llvm.maximum.f32(float %c, float %max0)
ret float %max1
@@ -70,21 +84,30 @@ define amdgpu_ps i32 @s_fmaximum3_f32(float inreg %a, float inreg %b, float inre
; GFX12-NEXT: v_readfirstlane_b32 s0, v0
; GFX12-NEXT: ; return to shader part epilog
;
-; GFX9-LABEL: s_fmaximum3_f32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: v_mov_b32_e32 v0, s1
-; GFX9-NEXT: v_max_f32_e32 v1, s0, v0
-; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s0, v0
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX9-NEXT: v_max_f32_e32 v1, s2, v0
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s2, v0
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: v_readfirstlane_b32 s0, v0
-; GFX9-NEXT: ; return to shader part epilog
+; GFX940-LABEL: s_fmaximum3_f32:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: v_mov_b32_e32 v0, s1
+; GFX940-NEXT: v_max_f32_e32 v1, s0, v0
+; GFX940-NEXT: v_mov_b32_e32 v2, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, s0, v0
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX940-NEXT: v_max_f32_e32 v1, s2, v0
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, s2, v0
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX940-NEXT: s_nop 0
+; GFX940-NEXT: v_readfirstlane_b32 s0, v0
+; GFX940-NEXT: ; return to shader part epilog
+;
+; GFX950-LABEL: s_fmaximum3_f32:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: v_mov_b32_e32 v0, s0
+; GFX950-NEXT: v_maximum3_f32 v0, v0, s1, s1
+; GFX950-NEXT: v_maximum3_f32 v0, v0, s2, s2
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_readfirstlane_b32 s0, v0
+; GFX950-NEXT: ; return to shader part epilog
%max0 = call float @llvm.maximum.f32(float %a, float %b)
%max1 = call float @llvm.maximum.f32(float %max0, float %c)
%cast = bitcast float %max1 to i32
@@ -103,19 +126,26 @@ define float @v_fmaximum3_f32_fabs0(float %a, float %b, float %c) {
; GFX12-NEXT: v_maximum3_f32 v0, |v0|, v1, v2
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fmaximum3_f32_fabs0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e64 v3, |v0|, v1
-; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v0|, v1
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-NEXT: v_max_f32_e32 v1, v0, v2
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fmaximum3_f32_fabs0:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_max_f32_e64 v3, |v0|, v1
+; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v0|, v1
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX940-NEXT: v_max_f32_e32 v1, v0, v2
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fmaximum3_f32_fabs0:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_maximum3_f32 v0, |v0|, v1, v1
+; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call float @llvm.fabs.f32(float %a)
%max0 = call float @llvm.maximum.f32(float %a.fabs, float %b)
%max1 = call float @llvm.maximum.f32(float %max0, float %c)
@@ -133,19 +163,26 @@ define float @v_fmaximum3_f32_fabs1(float %a, float %b, float %c) {
; GFX12-NEXT: v_maximum3_f32 v0, v0, |v1|, v2
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fmaximum3_f32_fabs1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e64 v3, v0, |v1|
-; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v1|
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-NEXT: v_max_f32_e32 v1, v0, v2
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fmaximum3_f32_fabs1:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_max_f32_e64 v3, v0, |v1|
+; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, |v1|
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX940-NEXT: v_max_f32_e32 v1, v0, v2
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fmaximum3_f32_fabs1:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_maximum3_f32 v0, v0, |v1|, |v1|
+; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%b.fabs = call float @llvm.fabs.f32(float %b)
%max0 = call float @llvm.maximum.f32(float %a, float %b.fabs)
%max1 = call float @llvm.maximum.f32(float %max0, float %c)
@@ -163,19 +200,26 @@ define float @v_fmaximum3_f32_fabs2(float %a, float %b, float %c) {
; GFX12-NEXT: v_maximum3_f32 v0, v0, v1, |v2|
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fmaximum3_f32_fabs2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v3, v0, v1
-; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-NEXT: v_max_f32_e64 v1, v0, |v2|
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v2|
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fmaximum3_f32_fabs2:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_max_f32_e32 v3, v0, v1
+; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX940-NEXT: v_max_f32_e64 v1, v0, |v2|
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, |v2|
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fmaximum3_f32_fabs2:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1
+; GFX950-NEXT: v_maximum3_f32 v0, v0, |v2|, |v2|
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%c.fabs = call float @llvm.fabs.f32(float %c)
%max0 = call float @llvm.maximum.f32(float %a, float %b)
%max1 = call float @llvm.maximum.f32(float %max0, float %c.fabs)
@@ -193,19 +237,26 @@ define float @v_fmaximum3_f32_fabs_all(float %a, float %b, float %c) {
; GFX12-NEXT: v_maximum3_f32 v0, |v0|, |v1|, |v2|
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fmaximum3_f32_fabs_all:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e64 v3, |v0|, |v1|
-; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v1|
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-NEXT: v_max_f32_e64 v1, v0, |v2|
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v2|
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fmaximum3_f32_fabs_all:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_max_f32_e64 v3, |v0|, |v1|
+; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v1|
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX940-NEXT: v_max_f32_e64 v1, v0, |v2|
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, |v2|
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fmaximum3_f32_fabs_all:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_maximum3_f32 v0, |v0|, |v1|, |v1|
+; GFX950-NEXT: v_maximum3_f32 v0, v0, |v2|, |v2|
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call float @llvm.fabs.f32(float %a)
%b.fabs = call float @llvm.fabs.f32(float %b)
%c.fabs = call float @llvm.fabs.f32(float %c)
@@ -225,19 +276,26 @@ define float @v_fmaximum3_f32_fneg_all(float %a, float %b, float %c) {
; GFX12-NEXT: v_maximum3_f32 v0, -v0, -v1, -v2
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fmaximum3_f32_fneg_all:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e64 v3, -v0, -v1
-; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v1
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-NEXT: v_max_f32_e64 v1, v0, -v2
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v2
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fmaximum3_f32_fneg_all:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_max_f32_e64 v3, -v0, -v1
+; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v1
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX940-NEXT: v_max_f32_e64 v1, v0, -v2
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, -v2
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fmaximum3_f32_fneg_all:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_maximum3_f32 v0, -v0, -v1, -v1
+; GFX950-NEXT: v_maximum3_f32 v0, v0, -v2, -v2
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg float %a
%b.fneg = fneg float %b
%c.fneg = fneg float %c
@@ -257,19 +315,26 @@ define float @v_fmaximum3_f32_fneg_fabs_all(float %a, float %b, float %c) {
; GFX12-NEXT: v_maximum3_f32 v0, -|v0|, -|v1|, -|v2|
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fmaximum3_f32_fneg_fabs_all:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e64 v3, -|v0|, -|v1|
-; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -|v0|, -|v1|
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-NEXT: v_max_f32_e64 v1, v0, -|v2|
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -|v2|
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fmaximum3_f32_fneg_fabs_all:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_max_f32_e64 v3, -|v0|, -|v1|
+; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -|v0|, -|v1|
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX940-NEXT: v_max_f32_e64 v1, v0, -|v2|
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, -|v2|
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fmaximum3_f32_fneg_fabs_all:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_maximum3_f32 v0, -|v0|, -|v1|, -|v1|
+; GFX950-NEXT: v_maximum3_f32 v0, v0, -|v2|, -|v2|
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call float @llvm.fabs.f32(float %a)
%b.fabs = call float @llvm.fabs.f32(float %b)
%c.fabs = call float @llvm.fabs.f32(float %c)
@@ -292,19 +357,26 @@ define float @v_fmaximum3_f32_fneg0(float %a, float %b, float %c) {
; GFX12-NEXT: v_maximum3_f32 v0, -v0, v1, v2
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fmaximum3_f32_fneg0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e64 v3, -v0, v1
-; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v0, v1
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-NEXT: v_max_f32_e32 v1, v0, v2
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fmaximum3_f32_fneg0:
+; GFX940: ; %bb.0:
+; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT: v_max_f32_e64 v3, -v0, v1
+; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000
+; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v0, v1
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX940-NEXT: v_max_f32_e32 v1, v0, v2
+; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX940-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fmaximum3_f32_fneg0:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_maximum3_f32 v0, -v0, v1, v1
+; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2
+; GFX950-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg float %a
%max0 = call float @llvm.maximum.f32(float %a.fneg, float %b)
%max1 = call float @llvm.maximum.f32(float %max0, float %c)
@@ -322,19 +394,26 @@ define float @v_fmaximum3_f32_fneg1(float %a, float %b, float %c) {
; GFX12-NEXT: v_maximum3_f32 v0, v0, -v1, v2
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_fmaximum3_f32_fneg1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e64 v3, v0, -v1
-; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v1
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-NEXT: v_max_f32_e32 v1, v0, v2
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fmaximum3_f32_fneg1:
+; GFX940: ; %bb.0:
+; GFX940-NE...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/117634
More information about the llvm-branch-commits
mailing list