[llvm] [SelectionDAG] Stop forming minnum/maxnum in SDAGBuilder (PR #187738)
Nikita Popov via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 20 09:57:19 PDT 2026
https://github.com/nikic created https://github.com/llvm/llvm-project/pull/187738
This is not the right place to do it, and the SPF based code is not entirely correct. This removes the SPNB_RETURNS_ANY case, the SPNB_RETURNS_OTHER is left to a followup.
>From d54cccadffc62571108d055660092ce218ec9fa0 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov at redhat.com>
Date: Thu, 5 Mar 2026 10:26:22 +0100
Subject: [PATCH] [SelectionDAG] Stop forming minnum/maxnum in SDAGBuilder
This is not the right place to do it, and the SPF based code is
not entirely correct. This removes the SPNB_RETURNS_ANY case,
the SPNB_RETURNS_OTHER is left to a followup.
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 33 +-
.../SelectionDAG/LegalizeFloatTypes.cpp | 2 +-
.../SelectionDAG/LegalizeVectorTypes.cpp | 5 +-
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 3 +-
.../SelectionDAG/SelectionDAGBuilder.cpp | 20 +-
llvm/test/CodeGen/AArch64/arm64-fmax.ll | 9 +-
llvm/test/CodeGen/AArch64/select_fmf.ll | 20 +-
llvm/test/CodeGen/AMDGPU/fmax_legacy.f16.ll | 4 +-
llvm/test/CodeGen/AMDGPU/fmin_legacy.f16.ll | 4 +-
llvm/test/CodeGen/AMDGPU/reduction.ll | 24 +-
.../select-nsz-known-values-to-fmin-fmax.ll | 361 ++++++------------
11 files changed, 167 insertions(+), 318 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 82f8fd572bf19..a5bc3aba8ae56 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -12228,15 +12228,17 @@ SDValue DAGCombiner::visitCTPOP(SDNode *N) {
}
static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS,
- SDValue RHS, const SDNodeFlags Flags,
+ SDValue RHS,
+ const SDNodeFlags SelectFlags,
+ const SDNodeFlags CmpFlags,
const TargetLowering &TLI) {
EVT VT = LHS.getValueType();
if (!VT.isFloatingPoint())
return false;
- return Flags.hasNoSignedZeros() &&
+ return SelectFlags.hasNoSignedZeros() &&
TLI.isProfitableToCombineMinNumMaxNum(VT) &&
- (Flags.hasNoNaNs() ||
+ (SelectFlags.hasNoNaNs() || CmpFlags.hasNoNaNs() ||
(DAG.isKnownNeverNaN(RHS) && DAG.isKnownNeverNaN(LHS)));
}
@@ -12245,7 +12247,16 @@ static SDValue combineMinNumMaxNumImpl(const SDLoc &DL, EVT VT, SDValue LHS,
ISD::CondCode CC,
const TargetLowering &TLI,
SelectionDAG &DAG) {
- EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ EVT TransformVT = VT;
+ while (TLI.getTypeAction(*DAG.getContext(), TransformVT) !=
+ TargetLoweringBase::TypeLegal)
+ TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), TransformVT);
+
+ // We have checked nnan and nsz as pre-conditions for the transform.
+ SDNodeFlags Flags;
+ Flags.setNoNaNs(true);
+ Flags.setNoSignedZeros(true);
+
switch (CC) {
case ISD::SETOLT:
case ISD::SETOLE:
@@ -12258,11 +12269,11 @@ static SDValue combineMinNumMaxNumImpl(const SDLoc &DL, EVT VT, SDValue LHS,
// expanded in terms of it.
unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
- return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
+ return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS, Flags);
unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
- return DAG.getNode(Opcode, DL, VT, LHS, RHS);
+ return DAG.getNode(Opcode, DL, VT, LHS, RHS, Flags);
return SDValue();
}
case ISD::SETOGT:
@@ -12273,11 +12284,11 @@ static SDValue combineMinNumMaxNumImpl(const SDLoc &DL, EVT VT, SDValue LHS,
case ISD::SETUGE: {
unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
- return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
+ return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS, Flags);
unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
- return DAG.getNode(Opcode, DL, VT, LHS, RHS);
+ return DAG.getNode(Opcode, DL, VT, LHS, RHS, Flags);
return SDValue();
}
default:
@@ -12874,7 +12885,8 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
// select (fcmp gt x, y), x, y -> fmaxnum x, y
//
// This is OK if we don't care what happens if either operand is a NaN.
- if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, Flags, TLI))
+ if (N0.hasOneUse() &&
+ isLegalToCombineMinNumMaxNum(DAG, N1, N2, Flags, N0->getFlags(), TLI))
if (SDValue FMinMax =
combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2, CC))
return FMinMax;
@@ -13867,7 +13879,8 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
// NaN.
//
if (N0.hasOneUse() &&
- isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, N->getFlags(), TLI)) {
+ isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, N->getFlags(),
+ N0->getFlags(), TLI)) {
if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC))
return FMinMax;
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 25f4f75eaedea..06ca204dd09ba 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -3027,7 +3027,7 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_SELECT(SDNode *N) {
SDValue Op1 = GetSoftPromotedHalf(N->getOperand(1));
SDValue Op2 = GetSoftPromotedHalf(N->getOperand(2));
return DAG.getSelect(SDLoc(N), Op1.getValueType(), N->getOperand(0), Op1,
- Op2);
+ Op2, N->getFlags());
}
SDValue DAGTypeLegalizer::SoftPromoteHalfRes_SELECT_CC(SDNode *N) {
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 564bf3b7f152e..6fad5c1a6acce 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -695,9 +695,8 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) {
if (BoolVT.bitsLT(CondVT))
Cond = DAG.getNode(ISD::TRUNCATE, SDLoc(N), BoolVT, Cond);
- return DAG.getSelect(SDLoc(N),
- LHS.getValueType(), Cond, LHS,
- GetScalarizedVector(N->getOperand(2)));
+ return DAG.getSelect(SDLoc(N), LHS.getValueType(), Cond, LHS,
+ GetScalarizedVector(N->getOperand(2)), N->getFlags());
}
SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT(SDNode *N) {
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 8e06325c3a8d5..ff5ddd40d886c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -13798,7 +13798,8 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
break;
}
case ISD::VSELECT:
- Scalars.push_back(getNode(ISD::SELECT, dl, EltVT, Operands));
+ Scalars.push_back(
+ getNode(ISD::SELECT, dl, EltVT, Operands, N->getFlags()));
break;
case ISD::SHL:
case ISD::SRA:
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 04b17b56b3d49..6c0d3b3af54fc 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3872,17 +3872,13 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
switch (SPR.NaNBehavior) {
case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
- case SPNB_RETURNS_NAN: break;
+ case SPNB_RETURNS_ANY:
+ case SPNB_RETURNS_NAN:
+ break;
case SPNB_RETURNS_OTHER:
Opc = ISD::FMINIMUMNUM;
Flags.setNoSignedZeros(true);
break;
- case SPNB_RETURNS_ANY:
- if (TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT) ||
- (UseScalarMinMax &&
- TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT.getScalarType())))
- Opc = ISD::FMINNUM;
- break;
}
break;
case SPF_FMAXNUM:
@@ -3891,17 +3887,13 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
switch (SPR.NaNBehavior) {
case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
- case SPNB_RETURNS_NAN: break;
+ case SPNB_RETURNS_NAN:
+ case SPNB_RETURNS_ANY:
+ break;
case SPNB_RETURNS_OTHER:
Opc = ISD::FMAXIMUMNUM;
Flags.setNoSignedZeros(true);
break;
- case SPNB_RETURNS_ANY:
- if (TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT) ||
- (UseScalarMinMax &&
- TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT.getScalarType())))
- Opc = ISD::FMAXNUM;
- break;
}
break;
case SPF_NABS:
diff --git a/llvm/test/CodeGen/AArch64/arm64-fmax.ll b/llvm/test/CodeGen/AArch64/arm64-fmax.ll
index f311139e193a5..ff1f60cfe8bbc 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fmax.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fmax.ll
@@ -5,7 +5,8 @@ define double @test_direct(float %in) {
; CHECK-LABEL: test_direct:
; CHECK: // %bb.0:
; CHECK-NEXT: movi d1, #0000000000000000
-; CHECK-NEXT: fmaxnm s0, s0, s1
+; CHECK-NEXT: fcmp s0, #0.0
+; CHECK-NEXT: fcsel s0, s1, s0, lt
; CHECK-NEXT: fcvt d0, s0
; CHECK-NEXT: ret
%cmp = fcmp nnan olt float %in, 0.000000e+00
@@ -18,7 +19,8 @@ define double @test_cross(float %in) {
; CHECK-LABEL: test_cross:
; CHECK: // %bb.0:
; CHECK-NEXT: movi d1, #0000000000000000
-; CHECK-NEXT: fminnm s0, s0, s1
+; CHECK-NEXT: fcmp s0, #0.0
+; CHECK-NEXT: fcsel s0, s0, s1, lt
; CHECK-NEXT: fcvt d0, s0
; CHECK-NEXT: ret
%cmp = fcmp nnan ult float %in, 0.000000e+00
@@ -33,7 +35,8 @@ define double @test_cross_fail_nan(float %in) {
; CHECK-LABEL: test_cross_fail_nan:
; CHECK: // %bb.0:
; CHECK-NEXT: movi d1, #0000000000000000
-; CHECK-NEXT: fminnm s0, s0, s1
+; CHECK-NEXT: fcmp s0, #0.0
+; CHECK-NEXT: fcsel s0, s0, s1, lt
; CHECK-NEXT: fcvt d0, s0
; CHECK-NEXT: ret
%cmp = fcmp nnan olt float %in, 0.000000e+00
diff --git a/llvm/test/CodeGen/AArch64/select_fmf.ll b/llvm/test/CodeGen/AArch64/select_fmf.ll
index 88f517af65bb6..333e25fb5b824 100644
--- a/llvm/test/CodeGen/AArch64/select_fmf.ll
+++ b/llvm/test/CodeGen/AArch64/select_fmf.ll
@@ -8,12 +8,12 @@
define float @select_select_fold_select_and(float %w, float %x, float %y, float %z) {
; CHECK-SD-LABEL: select_select_fold_select_and:
; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: fminnm s4, s1, s2
; CHECK-SD-NEXT: fcmp s1, s2
+; CHECK-SD-NEXT: fmov s4, #0.50000000
+; CHECK-SD-NEXT: fcsel s1, s1, s2, lt
; CHECK-SD-NEXT: fmaxnm s2, s0, s3
-; CHECK-SD-NEXT: fmov s1, #0.50000000
-; CHECK-SD-NEXT: fccmp s4, s0, #4, lt
-; CHECK-SD-NEXT: fadd s1, s0, s1
+; CHECK-SD-NEXT: fccmp s1, s0, #4, lt
+; CHECK-SD-NEXT: fadd s1, s0, s4
; CHECK-SD-NEXT: fcsel s2, s2, s0, gt
; CHECK-SD-NEXT: fadd s4, s1, s2
; CHECK-SD-NEXT: fcmp s4, s1
@@ -98,13 +98,13 @@ exit: ; preds = %if.end.i159.i.i, %if.then.i
define float @select_select_fold_select_or(float %w, float %x, float %y, float %z) {
; CHECK-SD-LABEL: select_select_fold_select_or:
; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: fminnm s4, s1, s2
; CHECK-SD-NEXT: fcmp s1, s2
-; CHECK-SD-NEXT: fmaxnm s2, s0, s3
-; CHECK-SD-NEXT: fmov s1, #0.50000000
-; CHECK-SD-NEXT: fccmp s4, s0, #0, ge
-; CHECK-SD-NEXT: fadd s1, s0, s1
-; CHECK-SD-NEXT: fcsel s2, s0, s2, gt
+; CHECK-SD-NEXT: fcsel s1, s1, s2, lt
+; CHECK-SD-NEXT: fccmp s0, s3, #0, ge
+; CHECK-SD-NEXT: fmov s2, #0.50000000
+; CHECK-SD-NEXT: fccmp s1, s0, #0, le
+; CHECK-SD-NEXT: fadd s1, s0, s2
+; CHECK-SD-NEXT: fcsel s2, s0, s3, gt
; CHECK-SD-NEXT: fadd s4, s1, s2
; CHECK-SD-NEXT: fcmp s4, s1
; CHECK-SD-NEXT: b.le .LBB1_2
diff --git a/llvm/test/CodeGen/AMDGPU/fmax_legacy.f16.ll b/llvm/test/CodeGen/AMDGPU/fmax_legacy.f16.ll
index 1b494deca08aa..ec480c0cae1dc 100644
--- a/llvm/test/CodeGen/AMDGPU/fmax_legacy.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmax_legacy.f16.ll
@@ -320,8 +320,8 @@ define <3 x half> @test_fmax_legacy_ugt_v3f16_fast(<3 x half> %a, <3 x half> %b)
; SI-NEXT: v_max_f32_e32 v2, v4, v5
; SI-NEXT: v_cvt_f16_f32_e32 v1, v1
; SI-NEXT: v_cvt_f16_f32_e32 v2, v2
-; SI-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; SI-NEXT: v_or_b32_e32 v0, v0, v2
; SI-NEXT: s_setpc_b64 s[30:31]
;
@@ -771,9 +771,9 @@ define <8 x half> @test_fmax_legacy_ugt_v8f16_fast(<8 x half> %a, <8 x half> %b)
; SI-NEXT: v_cvt_f16_f32_e32 v6, v6
; SI-NEXT: v_cvt_f16_f32_e32 v7, v7
; SI-NEXT: v_lshlrev_b32_e32 v4, 16, v4
+; SI-NEXT: v_lshlrev_b32_e32 v5, 16, v5
; SI-NEXT: v_lshlrev_b32_e32 v6, 16, v6
; SI-NEXT: v_lshlrev_b32_e32 v7, 16, v7
-; SI-NEXT: v_lshlrev_b32_e32 v5, 16, v5
; SI-NEXT: v_or_b32_e32 v0, v0, v7
; SI-NEXT: v_or_b32_e32 v1, v1, v6
; SI-NEXT: v_or_b32_e32 v2, v2, v5
diff --git a/llvm/test/CodeGen/AMDGPU/fmin_legacy.f16.ll b/llvm/test/CodeGen/AMDGPU/fmin_legacy.f16.ll
index 8c9dccceff192..639dba0f20f1e 100644
--- a/llvm/test/CodeGen/AMDGPU/fmin_legacy.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmin_legacy.f16.ll
@@ -321,8 +321,8 @@ define <3 x half> @test_fmin_legacy_ule_v3f16_fast(<3 x half> %a, <3 x half> %b)
; SI-NEXT: v_min_f32_e32 v2, v4, v5
; SI-NEXT: v_cvt_f16_f32_e32 v1, v1
; SI-NEXT: v_cvt_f16_f32_e32 v2, v2
-; SI-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; SI-NEXT: v_or_b32_e32 v0, v0, v2
; SI-NEXT: s_setpc_b64 s[30:31]
;
@@ -772,9 +772,9 @@ define <8 x half> @test_fmin_legacy_ule_v8f16_fast(<8 x half> %a, <8 x half> %b)
; SI-NEXT: v_cvt_f16_f32_e32 v6, v6
; SI-NEXT: v_cvt_f16_f32_e32 v7, v7
; SI-NEXT: v_lshlrev_b32_e32 v4, 16, v4
+; SI-NEXT: v_lshlrev_b32_e32 v5, 16, v5
; SI-NEXT: v_lshlrev_b32_e32 v6, 16, v6
; SI-NEXT: v_lshlrev_b32_e32 v7, 16, v7
-; SI-NEXT: v_lshlrev_b32_e32 v5, 16, v5
; SI-NEXT: v_or_b32_e32 v0, v0, v7
; SI-NEXT: v_or_b32_e32 v1, v1, v6
; SI-NEXT: v_or_b32_e32 v2, v2, v5
diff --git a/llvm/test/CodeGen/AMDGPU/reduction.ll b/llvm/test/CodeGen/AMDGPU/reduction.ll
index 291eccd405b8a..0dd2b0ba5550b 100644
--- a/llvm/test/CodeGen/AMDGPU/reduction.ll
+++ b/llvm/test/CodeGen/AMDGPU/reduction.ll
@@ -619,8 +619,6 @@ define half @reduction_fast_max_pattern_v4f16(<4 x half> %vec4) {
; GFX9-LABEL: reduction_fast_max_pattern_v4f16:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX9-NEXT: v_pk_max_f16 v0, v0, v0
; GFX9-NEXT: v_pk_max_f16 v0, v0, v1
; GFX9-NEXT: v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -628,21 +626,17 @@ define half @reduction_fast_max_pattern_v4f16(<4 x half> %vec4) {
; VI-LABEL: reduction_fast_max_pattern_v4f16:
; VI: ; %bb.0: ; %entry
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT: v_max_f16_sdwa v2, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; VI-NEXT: v_max_f16_sdwa v3, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; VI-NEXT: v_max_f16_e32 v1, v1, v1
-; VI-NEXT: v_max_f16_e32 v0, v0, v0
-; VI-NEXT: v_max_f16_e32 v2, v3, v2
+; VI-NEXT: v_max_f16_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; VI-NEXT: v_max_f16_e32 v0, v0, v1
; VI-NEXT: v_max_f16_e32 v0, v0, v2
; VI-NEXT: s_setpc_b64 s[30:31]
entry:
%rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
%rdx.minmax.cmp = fcmp nnan nsz ogt <4 x half> %vec4, %rdx.shuf
- %rdx.minmax.select = select <4 x i1> %rdx.minmax.cmp, <4 x half> %vec4, <4 x half> %rdx.shuf
+ %rdx.minmax.select = select nnan nsz <4 x i1> %rdx.minmax.cmp, <4 x half> %vec4, <4 x half> %rdx.shuf
%rdx.shuf1 = shufflevector <4 x half> %rdx.minmax.select, <4 x half> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
%rdx.minmax.cmp2 = fcmp nnan nsz ogt <4 x half> %rdx.minmax.select, %rdx.shuf1
- %rdx.minmax.select3 = select <4 x i1> %rdx.minmax.cmp2, <4 x half> %rdx.minmax.select, <4 x half> %rdx.shuf1
+ %rdx.minmax.select3 = select nnan nsz <4 x i1> %rdx.minmax.cmp2, <4 x half> %rdx.minmax.select, <4 x half> %rdx.shuf1
%res = extractelement <4 x half> %rdx.minmax.select3, i32 0
ret half %res
}
@@ -653,8 +647,6 @@ define half @reduction_fast_min_pattern_v4f16(<4 x half> %vec4) {
; GFX9-LABEL: reduction_fast_min_pattern_v4f16:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX9-NEXT: v_pk_max_f16 v0, v0, v0
; GFX9-NEXT: v_pk_min_f16 v0, v0, v1
; GFX9-NEXT: v_min_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -662,21 +654,17 @@ define half @reduction_fast_min_pattern_v4f16(<4 x half> %vec4) {
; VI-LABEL: reduction_fast_min_pattern_v4f16:
; VI: ; %bb.0: ; %entry
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT: v_max_f16_sdwa v2, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; VI-NEXT: v_max_f16_sdwa v3, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; VI-NEXT: v_max_f16_e32 v1, v1, v1
-; VI-NEXT: v_max_f16_e32 v0, v0, v0
-; VI-NEXT: v_min_f16_e32 v2, v3, v2
+; VI-NEXT: v_min_f16_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; VI-NEXT: v_min_f16_e32 v0, v0, v1
; VI-NEXT: v_min_f16_e32 v0, v0, v2
; VI-NEXT: s_setpc_b64 s[30:31]
entry:
%rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
%rdx.minmax.cmp = fcmp nnan nsz olt <4 x half> %vec4, %rdx.shuf
- %rdx.minmax.select = select <4 x i1> %rdx.minmax.cmp, <4 x half> %vec4, <4 x half> %rdx.shuf
+ %rdx.minmax.select = select nnan nsz <4 x i1> %rdx.minmax.cmp, <4 x half> %vec4, <4 x half> %rdx.shuf
%rdx.shuf1 = shufflevector <4 x half> %rdx.minmax.select, <4 x half> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
%rdx.minmax.cmp2 = fcmp nnan nsz olt <4 x half> %rdx.minmax.select, %rdx.shuf1
- %rdx.minmax.select3 = select <4 x i1> %rdx.minmax.cmp2, <4 x half> %rdx.minmax.select, <4 x half> %rdx.shuf1
+ %rdx.minmax.select3 = select nnan nsz <4 x i1> %rdx.minmax.cmp2, <4 x half> %rdx.minmax.select, <4 x half> %rdx.shuf1
%res = extractelement <4 x half> %rdx.minmax.select3, i32 0
ret half %res
}
diff --git a/llvm/test/CodeGen/AMDGPU/select-nsz-known-values-to-fmin-fmax.ll b/llvm/test/CodeGen/AMDGPU/select-nsz-known-values-to-fmin-fmax.ll
index 65af7749037e2..eb3106b5b740e 100644
--- a/llvm/test/CodeGen/AMDGPU/select-nsz-known-values-to-fmin-fmax.ll
+++ b/llvm/test/CodeGen/AMDGPU/select-nsz-known-values-to-fmin-fmax.ll
@@ -155,17 +155,16 @@ define <2 x half> @v_max_pat_v2f16_oge(<2 x half> nofpclass(nan) %a, <2 x half>
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v3
-; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v2
-; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v0
-; GFX7-NEXT: v_cmp_ge_f32_e32 vcc, v5, v4
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX7-NEXT: v_cmp_ge_f32_e32 vcc, v7, v6
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
-; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2
-; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX7-NEXT: v_or_b32_e32 v0, v0, v2
+; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_max_f32_e32 v2, v2, v3
+; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v2
+; GFX7-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_max_pat_v2f16_oge:
@@ -200,17 +199,16 @@ define <2 x half> @v_min_pat_v2f16_olt(<2 x half> nofpclass(nan) %a, <2 x half>
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v3
-; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v2
-; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v0
-; GFX7-NEXT: v_cmp_lt_f32_e32 vcc, v5, v4
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX7-NEXT: v_cmp_lt_f32_e32 vcc, v7, v6
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
-; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2
-; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX7-NEXT: v_or_b32_e32 v0, v0, v2
+; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_min_f32_e32 v2, v2, v3
+; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v2
+; GFX7-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_min_pat_v2f16_olt:
@@ -245,17 +243,16 @@ define <2 x half> @v_max_pat_v2f16_uge(<2 x half> nofpclass(nan) %a, <2 x half>
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v3
-; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v2
-; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v0
-; GFX7-NEXT: v_cmp_ge_f32_e32 vcc, v5, v4
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX7-NEXT: v_cmp_ge_f32_e32 vcc, v7, v6
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
-; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2
-; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX7-NEXT: v_or_b32_e32 v0, v0, v2
+; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_max_f32_e32 v2, v2, v3
+; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v2
+; GFX7-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_max_pat_v2f16_uge:
@@ -290,17 +287,16 @@ define <2 x half> @v_min_pat_v2f16_ult(<2 x half> nofpclass(nan) %a, <2 x half>
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v3
-; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v2
-; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v0
-; GFX7-NEXT: v_cmp_lt_f32_e32 vcc, v5, v4
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX7-NEXT: v_cmp_lt_f32_e32 vcc, v7, v6
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
-; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2
-; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX7-NEXT: v_or_b32_e32 v0, v0, v2
+; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_min_f32_e32 v2, v2, v3
+; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v2
+; GFX7-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_min_pat_v2f16_ult:
@@ -465,37 +461,21 @@ define <2 x float> @v_max_pat_v2f32_oge(<2 x float> nofpclass(nan) %a, <2 x floa
; GFX7-LABEL: v_max_pat_v2f32_oge:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_max_legacy_f32_e32 v0, v0, v2
-; GFX7-NEXT: v_max_legacy_f32_e32 v1, v1, v3
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX7-NEXT: v_max_f32_e32 v1, v1, v3
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
-; GFX900-LABEL: v_max_pat_v2f32_oge:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2
-; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX900-NEXT: v_cmp_ge_f32_e32 vcc, v1, v3
-; GFX900-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX950-LABEL: v_max_pat_v2f32_oge:
-; GFX950: ; %bb.0:
-; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX950-NEXT: v_cmp_ge_f32_e32 vcc, v1, v3
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX950-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: v_max_pat_v2f32_oge:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX9-NEXT: v_max_f32_e32 v1, v1, v3
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_max_pat_v2f32_oge:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_ge_f32_e32 vcc_lo, v0, v2
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
-; GFX11-NEXT: v_cmp_ge_f32_e32 vcc_lo, v1, v3
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
+; GFX11-NEXT: v_dual_max_f32 v0, v0, v2 :: v_dual_max_f32 v1, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_max_pat_v2f32_oge:
@@ -505,12 +485,7 @@ define <2 x float> @v_max_pat_v2f32_oge(<2 x float> nofpclass(nan) %a, <2 x floa
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_cmp_ge_f32_e32 vcc_lo, v0, v2
-; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
-; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
-; GFX12-NEXT: v_cmp_ge_f32_e32 vcc_lo, v1, v3
-; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
-; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
+; GFX12-NEXT: v_dual_max_num_f32 v0, v0, v2 :: v_dual_max_num_f32 v1, v1, v3
; GFX12-NEXT: s_setpc_b64 s[30:31]
%cmp = fcmp oge <2 x float> %a, %b
%select = select nsz <2 x i1> %cmp, <2 x float> %a, <2 x float> %b
@@ -521,37 +496,21 @@ define <2 x float> @v_min_pat_v2f32_olt(<2 x float> nofpclass(nan) %a, <2 x floa
; GFX7-LABEL: v_min_pat_v2f32_olt:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_min_legacy_f32_e32 v0, v0, v2
-; GFX7-NEXT: v_min_legacy_f32_e32 v1, v1, v3
+; GFX7-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX7-NEXT: v_min_f32_e32 v1, v1, v3
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
-; GFX900-LABEL: v_min_pat_v2f32_olt:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v1, v3
-; GFX900-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX950-LABEL: v_min_pat_v2f32_olt:
-; GFX950: ; %bb.0:
-; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v1, v3
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX950-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: v_min_pat_v2f32_olt:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX9-NEXT: v_min_f32_e32 v1, v1, v3
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_min_pat_v2f32_olt:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v2
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v3
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
+; GFX11-NEXT: v_dual_min_f32 v0, v0, v2 :: v_dual_min_f32 v1, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_min_pat_v2f32_olt:
@@ -561,12 +520,7 @@ define <2 x float> @v_min_pat_v2f32_olt(<2 x float> nofpclass(nan) %a, <2 x floa
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v2
-; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
-; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
-; GFX12-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v3
-; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
-; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
+; GFX12-NEXT: v_dual_min_num_f32 v0, v0, v2 :: v_dual_min_num_f32 v1, v1, v3
; GFX12-NEXT: s_setpc_b64 s[30:31]
%cmp = fcmp olt <2 x float> %a, %b
%select = select nsz <2 x i1> %cmp, <2 x float> %a, <2 x float> %b
@@ -577,37 +531,21 @@ define <2 x float> @v_max_pat_v2f32_uge(<2 x float> nofpclass(nan) %a, <2 x floa
; GFX7-LABEL: v_max_pat_v2f32_uge:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_max_legacy_f32_e32 v0, v0, v2
-; GFX7-NEXT: v_max_legacy_f32_e32 v1, v1, v3
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX7-NEXT: v_max_f32_e32 v1, v1, v3
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
-; GFX900-LABEL: v_max_pat_v2f32_uge:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2
-; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX900-NEXT: v_cmp_ge_f32_e32 vcc, v1, v3
-; GFX900-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX950-LABEL: v_max_pat_v2f32_uge:
-; GFX950: ; %bb.0:
-; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX950-NEXT: v_cmp_ge_f32_e32 vcc, v1, v3
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX950-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: v_max_pat_v2f32_uge:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX9-NEXT: v_max_f32_e32 v1, v1, v3
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_max_pat_v2f32_uge:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_ge_f32_e32 vcc_lo, v0, v2
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
-; GFX11-NEXT: v_cmp_ge_f32_e32 vcc_lo, v1, v3
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
+; GFX11-NEXT: v_dual_max_f32 v0, v0, v2 :: v_dual_max_f32 v1, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_max_pat_v2f32_uge:
@@ -617,12 +555,7 @@ define <2 x float> @v_max_pat_v2f32_uge(<2 x float> nofpclass(nan) %a, <2 x floa
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_cmp_ge_f32_e32 vcc_lo, v0, v2
-; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
-; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
-; GFX12-NEXT: v_cmp_ge_f32_e32 vcc_lo, v1, v3
-; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
-; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
+; GFX12-NEXT: v_dual_max_num_f32 v0, v0, v2 :: v_dual_max_num_f32 v1, v1, v3
; GFX12-NEXT: s_setpc_b64 s[30:31]
%cmp = fcmp uge <2 x float> %a, %b
%select = select nsz <2 x i1> %cmp, <2 x float> %a, <2 x float> %b
@@ -633,37 +566,21 @@ define <2 x float> @v_min_pat_v2f32_ult(<2 x float> nofpclass(nan) %a, <2 x floa
; GFX7-LABEL: v_min_pat_v2f32_ult:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_min_legacy_f32_e32 v0, v0, v2
-; GFX7-NEXT: v_min_legacy_f32_e32 v1, v1, v3
+; GFX7-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX7-NEXT: v_min_f32_e32 v1, v1, v3
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
-; GFX900-LABEL: v_min_pat_v2f32_ult:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v1, v3
-; GFX900-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX950-LABEL: v_min_pat_v2f32_ult:
-; GFX950: ; %bb.0:
-; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v1, v3
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX950-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: v_min_pat_v2f32_ult:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX9-NEXT: v_min_f32_e32 v1, v1, v3
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_min_pat_v2f32_ult:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v2
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v3
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
+; GFX11-NEXT: v_dual_min_f32 v0, v0, v2 :: v_dual_min_f32 v1, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_min_pat_v2f32_ult:
@@ -673,12 +590,7 @@ define <2 x float> @v_min_pat_v2f32_ult(<2 x float> nofpclass(nan) %a, <2 x floa
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v2
-; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
-; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
-; GFX12-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v3
-; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
-; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
+; GFX12-NEXT: v_dual_min_num_f32 v0, v0, v2 :: v_dual_min_num_f32 v1, v1, v3
; GFX12-NEXT: s_setpc_b64 s[30:31]
%cmp = fcmp ult <2 x float> %a, %b
%select = select nsz <2 x i1> %cmp, <2 x float> %a, <2 x float> %b
@@ -821,47 +733,22 @@ define <2 x double> @v_max_pat_v2f64_oge(<2 x double> nofpclass(nan) %a, <2 x do
; GFX7-LABEL: v_max_pat_v2f64_oge:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cmp_ge_f64_e32 vcc, v[0:1], v[4:5]
-; GFX7-NEXT: v_cmp_ge_f64_e64 s[4:5], v[2:3], v[6:7]
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[4:5]
+; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
-; GFX900-LABEL: v_max_pat_v2f64_oge:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_cmp_ge_f64_e32 vcc, v[0:1], v[4:5]
-; GFX900-NEXT: v_cmp_ge_f64_e64 s[4:5], v[2:3], v[6:7]
-; GFX900-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX900-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5]
-; GFX900-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[4:5]
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX950-LABEL: v_max_pat_v2f64_oge:
-; GFX950: ; %bb.0:
-; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_cmp_ge_f64_e32 vcc, v[0:1], v[4:5]
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX950-NEXT: v_cmp_ge_f64_e32 vcc, v[2:3], v[6:7]
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
-; GFX950-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: v_max_pat_v2f64_oge:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_max_pat_v2f64_oge:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_ge_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX11-NEXT: v_cmp_ge_f64_e64 s0, v[2:3], v[6:7]
-; GFX11-NEXT: v_dual_cndmask_b32 v0, v4, v0 :: v_dual_cndmask_b32 v1, v5, v1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v6, v2, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v7, v3, s0
+; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_max_pat_v2f64_oge:
@@ -871,14 +758,8 @@ define <2 x double> @v_max_pat_v2f64_oge(<2 x double> nofpclass(nan) %a, <2 x do
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_cmp_ge_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX12-NEXT: v_cmp_ge_f64_e64 s0, v[2:3], v[6:7]
-; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
-; GFX12-NEXT: v_dual_cndmask_b32 v0, v4, v0 :: v_dual_cndmask_b32 v1, v5, v1
-; GFX12-NEXT: s_wait_alu depctr_va_sdst(0)
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX12-NEXT: v_cndmask_b32_e64 v2, v6, v2, s0
-; GFX12-NEXT: v_cndmask_b32_e64 v3, v7, v3, s0
+; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[4:5]
+; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[6:7]
; GFX12-NEXT: s_setpc_b64 s[30:31]
%cmp = fcmp oge <2 x double> %a, %b
%select = select nsz <2 x i1> %cmp, <2 x double> %a, <2 x double> %b
@@ -889,47 +770,22 @@ define <2 x double> @v_min_pat_v2f64_olt(<2 x double> nofpclass(nan) %a, <2 x do
; GFX7-LABEL: v_min_pat_v2f64_olt:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX7-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[6:7]
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[4:5]
+; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX7-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
-; GFX900-LABEL: v_min_pat_v2f64_olt:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX900-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[6:7]
-; GFX900-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX900-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5]
-; GFX900-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[4:5]
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX950-LABEL: v_min_pat_v2f64_olt:
-; GFX950: ; %bb.0:
-; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX950-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[6:7]
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
-; GFX950-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: v_min_pat_v2f64_olt:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_min_pat_v2f64_olt:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_lt_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX11-NEXT: v_cmp_lt_f64_e64 s0, v[2:3], v[6:7]
-; GFX11-NEXT: v_dual_cndmask_b32 v0, v4, v0 :: v_dual_cndmask_b32 v1, v5, v1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v6, v2, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v7, v3, s0
+; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX11-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_min_pat_v2f64_olt:
@@ -939,14 +795,8 @@ define <2 x double> @v_min_pat_v2f64_olt(<2 x double> nofpclass(nan) %a, <2 x do
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_cmp_lt_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX12-NEXT: v_cmp_lt_f64_e64 s0, v[2:3], v[6:7]
-; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
-; GFX12-NEXT: v_dual_cndmask_b32 v0, v4, v0 :: v_dual_cndmask_b32 v1, v5, v1
-; GFX12-NEXT: s_wait_alu depctr_va_sdst(0)
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX12-NEXT: v_cndmask_b32_e64 v2, v6, v2, s0
-; GFX12-NEXT: v_cndmask_b32_e64 v3, v7, v3, s0
+; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[4:5]
+; GFX12-NEXT: v_min_num_f64_e32 v[2:3], v[2:3], v[6:7]
; GFX12-NEXT: s_setpc_b64 s[30:31]
%cmp = fcmp olt <2 x double> %a, %b
%select = select nsz <2 x i1> %cmp, <2 x double> %a, <2 x double> %b
@@ -954,3 +804,6 @@ define <2 x double> @v_min_pat_v2f64_olt(<2 x double> nofpclass(nan) %a, <2 x do
}
attributes #0 = { nounwind }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX900: {{.*}}
+; GFX950: {{.*}}
More information about the llvm-commits
mailing list