[llvm] DAG: Use flags in isLegalToCombineMinNumMaxNum (PR #93555)
via llvm-commits
llvm-commits at lists.llvm.org
Tue May 28 07:30:21 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-selectiondag
Author: Matt Arsenault (arsenm)
<details>
<summary>Changes</summary>
---
Patch is 73.02 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/93555.diff
2 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+11-8)
- (added) llvm/test/CodeGen/AMDGPU/select-flags-to-fmin-fmax.ll (+1757)
``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 93d866384b482..2f4fdf5208d07 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -11186,17 +11186,19 @@ SDValue DAGCombiner::visitCTPOP(SDNode *N) {
return SDValue();
}
-// FIXME: This should be checking for no signed zeros on individual operands, as
-// well as no nans.
static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS,
- SDValue RHS,
+ SDValue RHS, const SDNodeFlags Flags,
const TargetLowering &TLI) {
- const TargetOptions &Options = DAG.getTarget().Options;
EVT VT = LHS.getValueType();
+ if (!VT.isFloatingPoint())
+ return false;
+
+ const TargetOptions &Options = DAG.getTarget().Options;
- return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
+ return (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) &&
TLI.isProfitableToCombineMinNumMaxNum(VT) &&
- DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS);
+ (Flags.hasNoNaNs() ||
+ (DAG.isKnownNeverNaN(RHS) && DAG.isKnownNeverNaN(LHS)));
}
static SDValue combineMinNumMaxNumImpl(const SDLoc &DL, EVT VT, SDValue LHS,
@@ -11674,7 +11676,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
// select (fcmp gt x, y), x, y -> fmaxnum x, y
//
// This is OK if we don't care what happens if either operand is a NaN.
- if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI))
+ if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, Flags, TLI))
if (SDValue FMinMax =
combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2, CC))
return FMinMax;
@@ -12267,7 +12269,8 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
// This is OK if we don't care about what happens if either operand is a
// NaN.
//
- if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) {
+ if (N0.hasOneUse() &&
+ isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, N->getFlags(), TLI)) {
if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC))
return FMinMax;
}
diff --git a/llvm/test/CodeGen/AMDGPU/select-flags-to-fmin-fmax.ll b/llvm/test/CodeGen/AMDGPU/select-flags-to-fmin-fmax.ll
new file mode 100644
index 0000000000000..50a3336a7483c
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/select-flags-to-fmin-fmax.ll
@@ -0,0 +1,1757 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck -check-prefix=GFX7 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
+
+; Test if fcmp+select patterns form min/max instructions when allowed
+; by flags.
+
+; TODO: Merge with fmin_legacy.ll/fmax_legacy.ll
+
+define float @v_test_fmin_legacy_ule_f32_safe(float %a, float %b) {
+; GFX7-LABEL: v_test_fmin_legacy_ule_f32_safe:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_min_legacy_f32_e32 v0, v1, v0
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_test_fmin_legacy_ule_f32_safe:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v0, v1
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_test_fmin_legacy_ule_f32_safe:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v0, v1
+; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %cmp = fcmp ule float %a, %b
+ %val = select i1 %cmp, float %a, float %b
+ ret float %val
+}
+
+define float @v_test_fmin_legacy_ule_f32_nnan_flag(float %a, float %b) {
+; GFX7-LABEL: v_test_fmin_legacy_ule_f32_nnan_flag:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_min_legacy_f32_e32 v0, v1, v0
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_test_fmin_legacy_ule_f32_nnan_flag:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v0, v1
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_test_fmin_legacy_ule_f32_nnan_flag:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v0, v1
+; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %cmp = fcmp ule float %a, %b
+ %val = select nnan i1 %cmp, float %a, float %b
+ ret float %val
+}
+
+define float @v_test_fmin_legacy_ule_f32_nsz_flag(float %a, float %b) {
+; GFX7-LABEL: v_test_fmin_legacy_ule_f32_nsz_flag:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_min_legacy_f32_e32 v0, v1, v0
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_test_fmin_legacy_ule_f32_nsz_flag:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v0, v1
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_test_fmin_legacy_ule_f32_nsz_flag:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v0, v1
+; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %cmp = fcmp ule float %a, %b
+ %val = select nsz i1 %cmp, float %a, float %b
+ ret float %val
+}
+
+define float @v_test_fmin_legacy_ule_f32_nnan_nsz_flag(float %a, float %b) {
+; GFX7-LABEL: v_test_fmin_legacy_ule_f32_nnan_nsz_flag:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_test_fmin_legacy_ule_f32_nnan_nsz_flag:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_test_fmin_legacy_ule_f32_nnan_nsz_flag:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_min_num_f32_e32 v0, v0, v1
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %cmp = fcmp ule float %a, %b
+ %val = select nnan nsz i1 %cmp, float %a, float %b
+ ret float %val
+}
+
+define float @v_test_fmax_legacy_uge_f32_safe(float %a, float %b) {
+; GFX7-LABEL: v_test_fmax_legacy_uge_f32_safe:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_max_legacy_f32_e32 v0, v1, v0
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_test_fmax_legacy_uge_f32_safe:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v1
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_test_fmax_legacy_uge_f32_safe:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v0, v1
+; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %cmp = fcmp uge float %a, %b
+ %val = select i1 %cmp, float %a, float %b
+ ret float %val
+}
+
+define float @v_test_fmax_legacy_uge_f32_nnan_flag(float %a, float %b) {
+; GFX7-LABEL: v_test_fmax_legacy_uge_f32_nnan_flag:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_max_legacy_f32_e32 v0, v1, v0
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_test_fmax_legacy_uge_f32_nnan_flag:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v1
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_test_fmax_legacy_uge_f32_nnan_flag:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v0, v1
+; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %cmp = fcmp uge float %a, %b
+ %val = select nnan i1 %cmp, float %a, float %b
+ ret float %val
+}
+
+define float @v_test_fmax_legacy_uge_f32_nsz_flag(float %a, float %b) {
+; GFX7-LABEL: v_test_fmax_legacy_uge_f32_nsz_flag:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_max_legacy_f32_e32 v0, v1, v0
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_test_fmax_legacy_uge_f32_nsz_flag:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v1
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_test_fmax_legacy_uge_f32_nsz_flag:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v0, v1
+; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %cmp = fcmp uge float %a, %b
+ %val = select nsz i1 %cmp, float %a, float %b
+ ret float %val
+}
+
+define float @v_test_fmax_legacy_uge_f32_nnan_nsz_flag(float %a, float %b) {
+; GFX7-LABEL: v_test_fmax_legacy_uge_f32_nnan_nsz_flag:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_test_fmax_legacy_uge_f32_nnan_nsz_flag:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_test_fmax_legacy_uge_f32_nnan_nsz_flag:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v1
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %cmp = fcmp uge float %a, %b
+ %val = select nnan nsz i1 %cmp, float %a, float %b
+ ret float %val
+}
+
+define <2 x float> @v_test_fmin_legacy_ule_v2f32_safe(<2 x float> %a, <2 x float> %b) {
+; GFX7-LABEL: v_test_fmin_legacy_ule_v2f32_safe:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_min_legacy_f32_e32 v0, v2, v0
+; GFX7-NEXT: v_min_legacy_f32_e32 v1, v3, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_test_fmin_legacy_ule_v2f32_safe:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v0, v2
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v1, v3
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_test_fmin_legacy_ule_v2f32_safe:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v0, v2
+; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
+; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v1, v3
+; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %cmp = fcmp ule <2 x float> %a, %b
+ %val = select <2 x i1> %cmp, <2 x float> %a, <2 x float> %b
+ ret <2 x float> %val
+}
+
+define <2 x float> @v_test_fmin_legacy_ule_v2f32_nnan_flag(<2 x float> %a, <2 x float> %b) {
+; GFX7-LABEL: v_test_fmin_legacy_ule_v2f32_nnan_flag:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_min_legacy_f32_e32 v0, v2, v0
+; GFX7-NEXT: v_min_legacy_f32_e32 v1, v3, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_test_fmin_legacy_ule_v2f32_nnan_flag:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v0, v2
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v1, v3
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_test_fmin_legacy_ule_v2f32_nnan_flag:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v0, v2
+; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
+; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v1, v3
+; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %cmp = fcmp ule <2 x float> %a, %b
+ %val = select nnan <2 x i1> %cmp, <2 x float> %a, <2 x float> %b
+ ret <2 x float> %val
+}
+
+define <2 x float> @v_test_fmin_legacy_ule_v2f32_nsz_flag(<2 x float> %a, <2 x float> %b) {
+; GFX7-LABEL: v_test_fmin_legacy_ule_v2f32_nsz_flag:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_min_legacy_f32_e32 v0, v2, v0
+; GFX7-NEXT: v_min_legacy_f32_e32 v1, v3, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_test_fmin_legacy_ule_v2f32_nsz_flag:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v0, v2
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v1, v3
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_test_fmin_legacy_ule_v2f32_nsz_flag:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v0, v2
+; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
+; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v1, v3
+; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %cmp = fcmp ule <2 x float> %a, %b
+ %val = select nsz <2 x i1> %cmp, <2 x float> %a, <2 x float> %b
+ ret <2 x float> %val
+}
+
+define <2 x float> @v_test_fmin_legacy_ule_v2f32_nnan_nsz_flag(<2 x float> %a, <2 x float> %b) {
+; GFX7-LABEL: v_test_fmin_legacy_ule_v2f32_nnan_nsz_flag:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_min_legacy_f32_e32 v0, v2, v0
+; GFX7-NEXT: v_min_legacy_f32_e32 v1, v3, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_test_fmin_legacy_ule_v2f32_nnan_nsz_flag:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v0, v2
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v1, v3
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_test_fmin_legacy_ule_v2f32_nnan_nsz_flag:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v0, v2
+; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
+; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v1, v3
+; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %cmp = fcmp ule <2 x float> %a, %b
+ %val = select nnan nsz <2 x i1> %cmp, <2 x float> %a, <2 x float> %b
+ ret <2 x float> %val
+}
+
+define <2 x float> @v_test_fmax_legacy_uge_v2f32_safe(<2 x float> %a, <2 x float> %b) {
+; GFX7-LABEL: v_test_fmax_legacy_uge_v2f32_safe:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_max_legacy_f32_e32 v0, v2, v0
+; GFX7-NEXT: v_max_legacy_f32_e32 v1, v3, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_test_fmax_legacy_uge_v2f32_safe:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX9-NEXT: v_cmp_nlt_f32_e32 vcc, v1, v3
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_test_fmax_legacy_uge_v2f32_safe:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v0, v2
+; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
+; GFX12-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v1, v3
+; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %cmp = fcmp uge <2 x float> %a, %b
+ %val = select <2 x i1> %cmp, <2 x float> %a, <2 x float> %b
+ ret <2 x float> %val
+}
+
+define <2 x float> @v_test_fmax_legacy_uge_v2f32_nnan_flag(<2 x float> %a, <2 x float> %b) {
+; GFX7-LABEL: v_test_fmax_legacy_uge_v2f32_nnan_flag:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_max_legacy_f32_e32 v0, v2, v0
+; GFX7-NEXT: v_max_legacy_f32_e32 v1, v3, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_test_fmax_legacy_uge_v2f32_nnan_flag:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX9-NEXT: v_cmp_nlt_f32_e32 vcc, v1, v3
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_test_fmax_legacy_uge_v2f32_nnan_flag:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v0, v2
+; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
+; GFX12-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v1, v3
+; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %cmp = fcmp uge <2 x float> %a, %b
+ %val = select nnan <2 x i1> %cmp, <2 x float> %a, <2 x float> %b
+ ret <2 x float> %val
+}
+
+define <2 x float> @v_test_fmax_legacy_uge_v2f32_nsz_flag(<2 x float> %a, <2 x float> %b) {
+; GFX7-LABEL: v_test_fmax_legacy_uge_v2f32_nsz_flag:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_max_legacy_f32_e32 v0, v2, v0
+; GFX7-NEXT: v_max_legacy_f32_e32 v1, v3, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_test_fmax_legacy_uge_v2f32_nsz_flag:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2
+; GFX9-NEXT: ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/93555
More information about the llvm-commits
mailing list