[llvm] AMDGPU/GlobalISel: Start legalizing minimumnum and maximumnum (PR #140900)
via llvm-commits
llvm-commits at lists.llvm.org
Wed May 21 06:46:53 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Matt Arsenault (arsenm)
<details>
<summary>Changes</summary>
This is the bare minimum to get the intrinsic to compile for AMDGPU,
and it's not optimal. We need to follow along closer with the existing
G_FMINNUM/G_FMAXNUM with custom lowering to handle the IEEE=0 case better.
Just re-use the existing lowering for the old semantics for
G_FMINNUM/G_FMAXNUM. This does not change G_FMINNUM/G_FMAXNUM's treatment,
nor try to handle the general expansion without an underlying min/max
variant (or with G_FMINIMUM/G_FMAXIMUM).
---
Patch is 1.06 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/140900.diff
4 Files Affected:
- (modified) llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp (+29-2)
- (modified) llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp (+8)
- (modified) llvm/test/CodeGen/AMDGPU/maximumnum.ll (+7661-3901)
- (modified) llvm/test/CodeGen/AMDGPU/minimumnum.ll (+7523-3848)
``````````diff
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 72f2ba75c927e..7b18a98d7f3ca 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -3221,6 +3221,8 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
case TargetOpcode::G_FMAXNUM_IEEE:
case TargetOpcode::G_FMINIMUM:
case TargetOpcode::G_FMAXIMUM:
+ case TargetOpcode::G_FMINIMUMNUM:
+ case TargetOpcode::G_FMAXIMUMNUM:
case TargetOpcode::G_FDIV:
case TargetOpcode::G_FREM:
case TargetOpcode::G_FCEIL:
@@ -4591,6 +4593,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
return lowerFCopySign(MI);
case G_FMINNUM:
case G_FMAXNUM:
+ case G_FMINIMUMNUM:
+ case G_FMAXIMUMNUM:
return lowerFMinNumMaxNum(MI);
case G_MERGE_VALUES:
return lowerMergeValues(MI);
@@ -5379,6 +5383,8 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_FMAXNUM_IEEE:
case G_FMINIMUM:
case G_FMAXIMUM:
+ case G_FMINIMUMNUM:
+ case G_FMAXIMUMNUM:
case G_FSHL:
case G_FSHR:
case G_ROTL:
@@ -6090,6 +6096,8 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
case TargetOpcode::G_FMAXNUM_IEEE:
case TargetOpcode::G_FMINIMUM:
case TargetOpcode::G_FMAXIMUM:
+ case TargetOpcode::G_FMINIMUMNUM:
+ case TargetOpcode::G_FMAXIMUMNUM:
case TargetOpcode::G_STRICT_FADD:
case TargetOpcode::G_STRICT_FSUB:
case TargetOpcode::G_STRICT_FMUL:
@@ -8139,8 +8147,27 @@ LegalizerHelper::lowerFCopySign(MachineInstr &MI) {
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) {
- unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ?
- TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE;
+ // FIXME: fminnum/fmaxnum and fminimumnum/fmaximumnum should not have
+ // identical handling. fminimumnum/fmaximumnum also need a path that do not
+ // depend on fminnum/fmaxnum.
+
+ unsigned NewOp;
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_FMINNUM:
+ NewOp = TargetOpcode::G_FMINNUM_IEEE;
+ break;
+ case TargetOpcode::G_FMINIMUMNUM:
+ NewOp = TargetOpcode::G_FMINNUM;
+ break;
+ case TargetOpcode::G_FMAXNUM:
+ NewOp = TargetOpcode::G_FMAXNUM_IEEE;
+ break;
+ case TargetOpcode::G_FMAXIMUMNUM:
+ NewOp = TargetOpcode::G_FMAXNUM;
+ break;
+ default:
+ llvm_unreachable("unexpected min/max opcode");
+ }
auto [Dst, Src0, Src1] = MI.getFirst3Regs();
LLT Ty = MRI.getType(Dst);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 7bb461e0a239f..86e8e7e241b35 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -960,6 +960,10 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
auto &MinNumMaxNum = getActionDefinitionsBuilder({
G_FMINNUM, G_FMAXNUM, G_FMINNUM_IEEE, G_FMAXNUM_IEEE});
+ // TODO: These should be custom lowered and are directly legal with IEEE=0
+ auto &MinimumNumMaximumNum =
+ getActionDefinitionsBuilder({G_FMINIMUMNUM, G_FMAXIMUMNUM});
+
if (ST.hasVOP3PInsts()) {
MinNumMaxNum.customFor(FPTypesPK16)
.moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
@@ -976,6 +980,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.scalarize(0);
}
+ MinimumNumMaximumNum.lower();
+
if (ST.hasVOP3PInsts())
FPOpActions.clampMaxNumElementsStrict(0, S16, 2);
@@ -2102,6 +2108,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
getActionDefinitionsBuilder({G_FMINIMUM, G_FMAXIMUM}).lower();
}
+
+
getActionDefinitionsBuilder({G_MEMCPY, G_MEMCPY_INLINE, G_MEMMOVE, G_MEMSET})
.lower();
diff --git a/llvm/test/CodeGen/AMDGPU/maximumnum.ll b/llvm/test/CodeGen/AMDGPU/maximumnum.ll
index e299f959edb08..c45d86ce306e7 100644
--- a/llvm/test/CodeGen/AMDGPU/maximumnum.ll
+++ b/llvm/test/CodeGen/AMDGPU/maximumnum.ll
@@ -1,106 +1,209 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck -check-prefix=GFX7 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefix=GFX8 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefix=GFX10 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-TRUE16 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck -check-prefixes=GFX7,GFX7-SDAG %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck -check-prefixes=GFX7,GFX7-GISEL %s
+
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8,GFX8-SDAG %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8,GFX8-GISEL %s
+
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900,GFX9-SDAG,GFX900-SDAG %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900,GFX9-GISEL,GFX900-GISEL %s
+
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950,GFX9-SDAG,GFX950-SDAG %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950,GFX9-GISEL,GFX950-GISEL %s
+
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10,GFX10-SDAG %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10,GFX10-GISEL %s
+
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16,GFX11-SDAG,GFX11-TRUE16-SDAG %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16,GFX11-GISEL,GFX11-TRUE16-GISEL %s
+
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16,GFX11-SDAG,GFX11-FAKE16-SDAG %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16,GFX11-GISEL,GFX11-FAKE16-GISEL %s
+
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-TRUE16,GFX12-SDAG,GFX12-TRUE16-SDAG %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-TRUE16,GFX12-GISEL,GFX12-TRUE16-GISEL %s
+
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16,GFX12-SDAG,GFX12-FAKE16-SDAG %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16,GFX12-GISEL,GFX12-FAKE16-GISEL %s
define half @v_maximumnum_f16(half %x, half %y) {
-; GFX7-LABEL: v_maximumnum_f16:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_maximumnum_f16:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_max_f16_e32 v1, v1, v1
-; GFX8-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX8-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: v_maximumnum_f16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f16_e32 v1, v1, v1
-; GFX9-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX9-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_maximumnum_f16:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_max_f16_e32 v1, v1, v1
-; GFX10-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX10-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-TRUE16-LABEL: v_maximumnum_f16:
-; GFX11-TRUE16: ; %bb.0:
-; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.h, v1.l, v1.l
-; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.h
-; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FAKE16-LABEL: v_maximumnum_f16:
-; GFX11-FAKE16: ; %bb.0:
-; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FAKE16-NEXT: v_max_f16_e32 v1, v1, v1
-; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-TRUE16-LABEL: v_maximumnum_f16:
-; GFX12-TRUE16: ; %bb.0:
-; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
-; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
-; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l
-; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
-; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
-; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-FAKE16-LABEL: v_maximumnum_f16:
-; GFX12-FAKE16: ; %bb.0:
-; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
-; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
-; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1
-; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0
-; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v1
-; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_maximumnum_f16:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_maximumnum_f16:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_maximumnum_f16:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_maximumnum_f16:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX8-GISEL-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: v_maximumnum_f16:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: v_maximumnum_f16:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX9-GISEL-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_maximumnum_f16:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX10-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX10-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_maximumnum_f16:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX10-GISEL-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX10-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-SDAG-LABEL: v_maximumnum_f16:
+; GFX11-TRUE16-SDAG: ; %bb.0:
+; GFX11-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e32 v0.h, v1.l, v1.l
+; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l
+; GFX11-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e32 v0.l, v0.l, v0.h
+; GFX11-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-GISEL-LABEL: v_maximumnum_f16:
+; GFX11-TRUE16-GISEL: ; %bb.0:
+; GFX11-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l
+; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e32 v0.h, v1.l, v1.l
+; GFX11-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e32 v0.l, v0.l, v0.h
+; GFX11-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-SDAG-LABEL: v_maximumnum_f16:
+; GFX11-FAKE16-SDAG: ; %bb.0:
+; GFX11-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX11-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX11-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-GISEL-LABEL: v_maximumnum_f16:
+; GFX11-FAKE16-GISEL: ; %bb.0:
+; GFX11-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX11-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-TRUE16-SDAG-LABEL: v_maximumnum_f16:
+; GFX12-TRUE16-SDAG: ; %bb.0:
+; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l
+; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GFX12-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
+; GFX12-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-TRUE16-GISEL-LABEL: v_maximumnum_f16:
+; GFX12-TRUE16-GISEL: ; %bb.0:
+; GFX12-TRUE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l
+; GFX12-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
+; GFX12-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-FAKE16-SDAG-LABEL: v_maximumnum_f16:
+; GFX12-FAKE16-SDAG: ; %bb.0:
+; GFX12-FAKE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v1, v1, v1
+; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GFX12-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v1
+; GFX12-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-FAKE16-GISEL-LABEL: v_maximumnum_f16:
+; GFX12-FAKE16-GISEL: ; %bb.0:
+; GFX12-FAKE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v1, v1, v1
+; GFX12-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v1
+; GFX12-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call half @llvm.maximumnum.f16(half %x, half %y)
ret half %result
}
define half @v_maximumnum_f16_nnan(half %x, half %y) {
-; GFX7-LABEL: v_maximumnum_f16_nnan:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX7-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_maximumnum_f16_nnan:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_maximumnum_f16_nnan:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximumnum_f16_nnan:
; GFX8: ; %bb.0:
@@ -156,13 +259,22 @@ define half @v_maximumnum_f16_nnan(half %x, half %y) {
}
define half @v_maximumnum_f16_1.0(half %x) {
-; GFX7-LABEL: v_maximumnum_f16_1.0:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_max_f32_e32 v0, 1.0, v0
-; GFX7-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: v_maximumnum_f16_1.0:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT: v_max_f32_e32 v0, 1.0, v0
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: v_maximumnum_f16_1.0:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, 1.0
+; GF...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/140900
More information about the llvm-commits
mailing list