[llvm] AMDGPU: Directly select minimumnum/maximumnum with ieee_mode=0 (PR #141903)
via llvm-commits
llvm-commits at lists.llvm.org
Thu May 29 00:03:59 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Matt Arsenault (arsenm)
<details>
<summary>Changes</summary>
The hardware min/max follow the IR rules with IEEE mode disabled,
so we can avoid the canonicalizes of the input. We lose the quieting
of a signaling nan if both inputs are nans, but we only require that
with strictfp.
---
Patch is 230.56 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/141903.diff
9 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPUInstructions.td (+2)
- (modified) llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp (+15-10)
- (modified) llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp (+2)
- (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+29-7)
- (modified) llvm/lib/Target/AMDGPU/SIISelLowering.h (+1)
- (modified) llvm/lib/Target/AMDGPU/SIInstructions.td (+46)
- (modified) llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll (+12-30)
- (modified) llvm/test/CodeGen/AMDGPU/maximumnum.ll (+629-813)
- (modified) llvm/test/CodeGen/AMDGPU/minimumnum.ll (+629-813)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index 18a948d68e97b..7a50923ffedc6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -92,6 +92,8 @@ def FP64Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().F
def NoFP16Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals == DenormalMode::getPreserveSign()">;
def NoFP32Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP32Denormals == DenormalMode::getPreserveSign()">;
def NoFP64Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals == DenormalMode::getPreserveSign()">;
+def IEEEModeEnabled : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().IEEE">;
+def IEEEModeDisabled : Predicate<"!MF->getInfo<SIMachineFunctionInfo>()->getMode().IEEE">;
def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 177750b639c67..ae530d35eff00 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -957,12 +957,9 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
FPOpActions.clampMaxNumElementsStrict(0, S32, 2);
}
- auto &MinNumMaxNum = getActionDefinitionsBuilder({
- G_FMINNUM, G_FMAXNUM, G_FMINNUM_IEEE, G_FMAXNUM_IEEE});
-
- // TODO: These should be custom lowered and are directly legal with IEEE=0
- auto &MinimumNumMaximumNum =
- getActionDefinitionsBuilder({G_FMINIMUMNUM, G_FMAXIMUMNUM});
+ auto &MinNumMaxNum = getActionDefinitionsBuilder(
+ {G_FMINNUM, G_FMAXNUM, G_FMINIMUMNUM, G_FMAXIMUMNUM, G_FMINNUM_IEEE,
+ G_FMAXNUM_IEEE});
if (ST.hasVOP3PInsts()) {
MinNumMaxNum.customFor(FPTypesPK16)
@@ -980,8 +977,6 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.scalarize(0);
}
- MinimumNumMaximumNum.lower();
-
if (ST.hasVOP3PInsts())
FPOpActions.clampMaxNumElementsStrict(0, S16, 2);
@@ -2160,6 +2155,8 @@ bool AMDGPULegalizerInfo::legalizeCustom(
return legalizeFPTOI(MI, MRI, B, false);
case TargetOpcode::G_FMINNUM:
case TargetOpcode::G_FMAXNUM:
+ case TargetOpcode::G_FMINIMUMNUM:
+ case TargetOpcode::G_FMAXIMUMNUM:
case TargetOpcode::G_FMINNUM_IEEE:
case TargetOpcode::G_FMAXNUM_IEEE:
return legalizeMinNumMaxNum(Helper, MI);
@@ -2739,9 +2736,17 @@ bool AMDGPULegalizerInfo::legalizeMinNumMaxNum(LegalizerHelper &Helper,
MI.getOpcode() == AMDGPU::G_FMAXNUM_IEEE;
// With ieee_mode disabled, the instructions have the correct behavior
- // already for G_FMINNUM/G_FMAXNUM
- if (!MFI->getMode().IEEE)
+ // already for G_FMINIMUMNUM/G_FMAXIMUMNUM.
+ //
+ // FIXME: G_FMINNUM/G_FMAXNUM should match the behavior with ieee_mode
+ // enabled.
+ if (!MFI->getMode().IEEE) {
+ if (MI.getOpcode() == AMDGPU::G_FMINIMUMNUM ||
+ MI.getOpcode() == AMDGPU::G_FMAXIMUMNUM)
+ return true;
+
return !IsIEEEOp;
+ }
if (IsIEEEOp)
return true;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index dd7aef8f0c583..4391a48ff2b68 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -4009,6 +4009,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_FMAXNUM:
case AMDGPU::G_FMINIMUM:
case AMDGPU::G_FMAXIMUM:
+ case AMDGPU::G_FMINIMUMNUM:
+ case AMDGPU::G_FMAXIMUMNUM:
case AMDGPU::G_INTRINSIC_TRUNC:
case AMDGPU::G_STRICT_FADD:
case AMDGPU::G_STRICT_FSUB:
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 74ca3e43fce3a..f161e5185e196 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -523,8 +523,9 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction({ISD::SADDSAT, ISD::SSUBSAT}, {MVT::i16, MVT::i32},
Legal);
- setOperationAction({ISD::FMINNUM, ISD::FMAXNUM}, {MVT::f32, MVT::f64},
- Custom);
+ setOperationAction(
+ {ISD::FMINNUM, ISD::FMAXNUM, ISD::FMINIMUMNUM, ISD::FMAXIMUMNUM},
+ {MVT::f32, MVT::f64}, Custom);
// These are really only legal for ieee_mode functions. We should be avoiding
// them for functions that don't have ieee_mode enabled, so just say they are
@@ -756,7 +757,9 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
// allows matching fneg (fabs x) patterns)
setOperationAction(ISD::FABS, MVT::v2f16, Legal);
- setOperationAction({ISD::FMAXNUM, ISD::FMINNUM}, MVT::f16, Custom);
+ setOperationAction(
+ {ISD::FMAXNUM, ISD::FMINNUM, ISD::FMINIMUMNUM, ISD::FMAXIMUMNUM},
+ MVT::f16, Custom);
setOperationAction({ISD::FMAXNUM_IEEE, ISD::FMINNUM_IEEE}, MVT::f16, Legal);
setOperationAction({ISD::FMINNUM_IEEE, ISD::FMAXNUM_IEEE, ISD::FMINIMUMNUM,
@@ -810,8 +813,9 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction({ISD::FADD, ISD::FMUL, ISD::FMA, ISD::FCANONICALIZE},
VT, Custom);
- setOperationAction({ISD::FMAXNUM, ISD::FMINNUM}, {MVT::v2f16, MVT::v4f16},
- Custom);
+ setOperationAction(
+ {ISD::FMAXNUM, ISD::FMINNUM, ISD::FMINIMUMNUM, ISD::FMAXIMUMNUM},
+ {MVT::v2f16, MVT::v4f16}, Custom);
setOperationAction(ISD::FEXP, MVT::v2f16, Custom);
setOperationAction(ISD::SELECT, {MVT::v4i16, MVT::v4f16, MVT::v4bf16},
@@ -6057,6 +6061,9 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::FMINNUM:
case ISD::FMAXNUM:
return lowerFMINNUM_FMAXNUM(Op, DAG);
+ case ISD::FMINIMUMNUM:
+ case ISD::FMAXIMUMNUM:
+ return lowerFMINIMUMNUM_FMAXIMUMNUM(Op, DAG);
case ISD::FMINIMUM:
case ISD::FMAXIMUM:
return lowerFMINIMUM_FMAXIMUM(Op, DAG);
@@ -6081,8 +6088,6 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::FMUL:
case ISD::FMINNUM_IEEE:
case ISD::FMAXNUM_IEEE:
- case ISD::FMINIMUMNUM:
- case ISD::FMAXIMUMNUM:
case ISD::UADDSAT:
case ISD::USUBSAT:
case ISD::SADDSAT:
@@ -6967,6 +6972,23 @@ SDValue SITargetLowering::lowerFMINNUM_FMAXNUM(SDValue Op,
return Op;
}
+SDValue
+SITargetLowering::lowerFMINIMUMNUM_FMAXIMUMNUM(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ const MachineFunction &MF = DAG.getMachineFunction();
+ const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+ bool IsIEEEMode = Info->getMode().IEEE;
+
+ if (IsIEEEMode)
+ return expandFMINIMUMNUM_FMAXIMUMNUM(Op.getNode(), DAG);
+
+ if (VT == MVT::v4f16 || VT == MVT::v8f16 || VT == MVT::v16f16 ||
+ VT == MVT::v16bf16)
+ return splitBinaryVectorOp(Op, DAG);
+ return Op;
+}
+
SDValue SITargetLowering::lowerFMINIMUM_FMAXIMUM(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index c42366a1c04c8..532d1e46714e6 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -146,6 +146,7 @@ class SITargetLowering final : public AMDGPUTargetLowering {
/// Custom lowering for ISD::FP_ROUND for MVT::f16.
SDValue lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFMINNUM_FMAXNUM(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerFMINIMUMNUM_FMAXIMUMNUM(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFMINIMUM_FMAXIMUM(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFLDEXP(SDValue Op, SelectionDAG &DAG) const;
SDValue promoteUniformOpToI32(SDValue Op, DAGCombinerInfo &DCI) const;
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 2e2913d88cc54..0cb3ba38e8016 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -1376,6 +1376,52 @@ def : GCNPat <
(i32 (V_MOV_B32_e32 (i32 0))), sub1)
>;
+
+
+class FPBinOpPat <SDPatternOperator node, ValueType vt, Instruction inst>
+ : GCNPat <(vt (node (vt (VOP3Mods vt:$src0, i32:$src0_mods)),
+ (vt (VOP3Mods vt:$src1, i32:$src1_mods)))),
+ (inst $src0_mods, $src0, $src1_mods, $src1, DSTCLAMP.NONE, DSTOMOD.NONE)
+>;
+
+class FPPkBinOpPat <SDPatternOperator node, ValueType vt, Instruction inst>
+ : GCNPat <(vt (node (VOP3PMods v2f16:$src0, i32:$src0_mods),
+ (VOP3PMods v2f16:$src1, i32:$src1_mods))),
+ (inst $src0_mods, $src0, $src1_mods, $src1, DSTCLAMP.NONE)
+>;
+
+/// With IEEE=0, signalingness is ignored and the non-nan input will
+/// be directly returned.
+let OtherPredicates = [IEEEModeDisabled] in {
+ def : FPBinOpPat<fminimumnum, f32, V_MIN_F32_e64>;
+ def : FPBinOpPat<fmaximumnum, f32, V_MAX_F32_e64>;
+ def : FPBinOpPat<fminimumnum, f64, V_MIN_F64_e64>;
+ def : FPBinOpPat<fmaximumnum, f64, V_MAX_F64_e64>;
+
+ let SubtargetPredicate = Has16BitInsts,
+ True16Predicate = NotHasTrue16BitInsts in {
+ def : FPBinOpPat<fminimumnum, f16, V_MIN_F16_e64>;
+ def : FPBinOpPat<fmaximumnum, f16, V_MAX_F16_e64>;
+ }
+
+ let SubtargetPredicate = Has16BitInsts,
+ True16Predicate = UseRealTrue16Insts in {
+ def : FPBinOpPat<fminimumnum, f16, V_MIN_F16_t16_e64>;
+ def : FPBinOpPat<fmaximumnum, f16, V_MAX_F16_t16_e64>;
+ }
+
+ let SubtargetPredicate = Has16BitInsts,
+ True16Predicate = UseFakeTrue16Insts in {
+ def : FPBinOpPat<fminimumnum, f16, V_MIN_F16_fake16_e64>;
+ def : FPBinOpPat<fmaximumnum, f16, V_MAX_F16_fake16_e64>;
+ }
+
+ let SubtargetPredicate = HasVOP3PInsts in {
+ def : FPPkBinOpPat<fminimumnum, v2f16, V_PK_MIN_F16>;
+ def : FPPkBinOpPat<fmaximumnum, v2f16, V_PK_MAX_F16>;
+ }
+}
+
/********** ============================================ **********/
/********** Extraction, Insertion, Building and Casting **********/
/********** ============================================ **********/
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
index 46da9d33639b6..86e73ed03f187 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
@@ -2019,9 +2019,7 @@ define float @v_fneg_minimumnum_f32_no_ieee(float %a, float %b) #4 {
; GCN-LABEL: v_fneg_minimumnum_f32_no_ieee:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v1, -1.0, v1
-; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
-; GCN-NEXT: v_max_f32_e32 v0, v0, v1
+; GCN-NEXT: v_max_f32_e64 v0, -v0, -v1
; GCN-NEXT: s_setpc_b64 s[30:31]
%min = call float @llvm.minimumnum.f32(float %a, float %b)
%fneg = fneg float %min
@@ -2044,8 +2042,7 @@ define float @v_fneg_self_minimumnum_f32_no_ieee(float %a) #4 {
; GCN-LABEL: v_fneg_self_minimumnum_f32_no_ieee:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
-; GCN-NEXT: v_max_f32_e32 v0, v0, v0
+; GCN-NEXT: v_max_f32_e64 v0, -v0, -v0
; GCN-NEXT: s_setpc_b64 s[30:31]
%min = call float @llvm.minimumnum.f32(float %a, float %a)
%min.fneg = fneg float %min
@@ -2068,8 +2065,7 @@ define float @v_fneg_posk_minimumnum_f32_no_ieee(float %a) #4 {
; GCN-LABEL: v_fneg_posk_minimumnum_f32_no_ieee:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
-; GCN-NEXT: v_max_f32_e32 v0, -4.0, v0
+; GCN-NEXT: v_max_f32_e64 v0, -v0, -4.0
; GCN-NEXT: s_setpc_b64 s[30:31]
%min = call float @llvm.minimumnum.f32(float 4.0, float %a)
%fneg = fneg float %min
@@ -2092,8 +2088,7 @@ define float @v_fneg_negk_minimumnum_f32_no_ieee(float %a) #4 {
; GCN-LABEL: v_fneg_negk_minimumnum_f32_no_ieee:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
-; GCN-NEXT: v_max_f32_e32 v0, 4.0, v0
+; GCN-NEXT: v_max_f32_e64 v0, -v0, 4.0
; GCN-NEXT: s_setpc_b64 s[30:31]
%min = call float @llvm.minimumnum.f32(float -4.0, float %a)
%fneg = fneg float %min
@@ -2251,8 +2246,7 @@ define float @v_fneg_neg0_minimumnum_f32_no_ieee(float %a) #4 {
; GCN-LABEL: v_fneg_neg0_minimumnum_f32_no_ieee:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
-; GCN-NEXT: v_max_f32_e32 v0, 0, v0
+; GCN-NEXT: v_max_f32_e64 v0, -v0, 0
; GCN-NEXT: s_setpc_b64 s[30:31]
%min = call float @llvm.minimumnum.f32(float -0.0, float %a)
%fneg = fneg float %min
@@ -2299,7 +2293,6 @@ define float @v_fneg_0_minimumnum_foldable_use_f32_no_ieee(float %a, float %b) #
; GCN-LABEL: v_fneg_0_minimumnum_foldable_use_f32_no_ieee:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
; GCN-NEXT: v_min_f32_e32 v0, 0, v0
; GCN-NEXT: v_mul_f32_e64 v0, -v0, v1
; GCN-NEXT: s_setpc_b64 s[30:31]
@@ -2330,9 +2323,7 @@ define <2 x float> @v_fneg_minimumnum_multi_use_minimumnum_f32_no_ieee(float %a,
; GCN-LABEL: v_fneg_minimumnum_multi_use_minimumnum_f32_no_ieee:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v1, -1.0, v1
-; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
-; GCN-NEXT: v_max_f32_e32 v0, v0, v1
+; GCN-NEXT: v_max_f32_e64 v0, -v0, -v1
; GCN-NEXT: v_mul_f32_e32 v1, -4.0, v0
; GCN-NEXT: s_setpc_b64 s[30:31]
%min = call float @llvm.minimumnum.f32(float %a, float %b)
@@ -2364,9 +2355,7 @@ define float @v_fneg_maximumnum_f32_no_ieee(float %a, float %b) #4 {
; GCN-LABEL: v_fneg_maximumnum_f32_no_ieee:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v1, -1.0, v1
-; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
-; GCN-NEXT: v_min_f32_e32 v0, v0, v1
+; GCN-NEXT: v_min_f32_e64 v0, -v0, -v1
; GCN-NEXT: s_setpc_b64 s[30:31]
%max = call float @llvm.maximumnum.f32(float %a, float %b)
%fneg = fneg float %max
@@ -2389,8 +2378,7 @@ define float @v_fneg_self_maximumnum_f32_no_ieee(float %a) #4 {
; GCN-LABEL: v_fneg_self_maximumnum_f32_no_ieee:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
-; GCN-NEXT: v_min_f32_e32 v0, v0, v0
+; GCN-NEXT: v_min_f32_e64 v0, -v0, -v0
; GCN-NEXT: s_setpc_b64 s[30:31]
%max = call float @llvm.maximumnum.f32(float %a, float %a)
%max.fneg = fneg float %max
@@ -2413,8 +2401,7 @@ define float @v_fneg_posk_maximumnum_f32_no_ieee(float %a) #4 {
; GCN-LABEL: v_fneg_posk_maximumnum_f32_no_ieee:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
-; GCN-NEXT: v_min_f32_e32 v0, -4.0, v0
+; GCN-NEXT: v_min_f32_e64 v0, -v0, -4.0
; GCN-NEXT: s_setpc_b64 s[30:31]
%max = call float @llvm.maximumnum.f32(float 4.0, float %a)
%fneg = fneg float %max
@@ -2437,8 +2424,7 @@ define float @v_fneg_negk_maximumnum_f32_no_ieee(float %a) #4 {
; GCN-LABEL: v_fneg_negk_maximumnum_f32_no_ieee:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
-; GCN-NEXT: v_min_f32_e32 v0, 4.0, v0
+; GCN-NEXT: v_min_f32_e64 v0, -v0, 4.0
; GCN-NEXT: s_setpc_b64 s[30:31]
%max = call float @llvm.maximumnum.f32(float -4.0, float %a)
%fneg = fneg float %max
@@ -2473,8 +2459,7 @@ define float @v_fneg_neg0_maximumnum_f32_no_ieee(float %a) #4 {
; GCN-LABEL: v_fneg_neg0_maximumnum_f32_no_ieee:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
-; GCN-NEXT: v_min_f32_e32 v0, 0, v0
+; GCN-NEXT: v_min_f32_e64 v0, -v0, 0
; GCN-NEXT: s_setpc_b64 s[30:31]
%max = call float @llvm.maximumnum.f32(float -0.0, float %a)
%fneg = fneg float %max
@@ -2499,7 +2484,6 @@ define float @v_fneg_0_maximumnum_foldable_use_f32_no_ieee(float %a, float %b) #
; GCN-LABEL: v_fneg_0_maximumnum_foldable_use_f32_no_ieee:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
; GCN-NEXT: v_max_f32_e32 v0, 0, v0
; GCN-NEXT: v_mul_f32_e64 v0, -v0, v1
; GCN-NEXT: s_setpc_b64 s[30:31]
@@ -2530,9 +2514,7 @@ define <2 x float> @v_fneg_maximumnum_multi_use_maximumnum_f32_no_ieee(float %a,
; GCN-LABEL: v_fneg_maximumnum_multi_use_maximumnum_f32_no_ieee:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v1, -1.0, v1
-; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
-; GCN-NEXT: v_min_f32_e32 v0, v0, v1
+; GCN-NEXT: v_min_f32_e64 v0, -v0, -v1
; GCN-NEXT: v_mul_f32_e32 v1, -4.0, v0
; GCN-NEXT: s_setpc_b64 s[30:31]
%max = call float @llvm.maximumnum.f32(float %a, float %b)
diff --git a/llvm/test/CodeGen/AMDGPU/maximumnum.ll b/llvm/test/CodeGen/AMDGPU/maximumnum.ll
index c45d86ce306e7..4f73e8e9c1883 100644
--- a/llvm/test/CodeGen/AMDGPU/maximumnum.ll
+++ b/llvm/test/CodeGen/AMDGPU/maximumnum.ll
@@ -3414,8 +3414,8 @@ define <2 x half> @v_maximumnum_v2f16(<2 x half> %x, <2 x half> %y) {
; GFX8-GISEL: ; %bb.0:
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-GISEL-NEXT: v_max_f16_e32 v2, v0, v0
-; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-GISEL-NEXT: v_max_f16_e32 v3, v1, v1
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-GISEL-NEXT: v_max_f16_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-GISEL-NEXT: v_max_f16_e32 v2, v2, v3
; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
@@ -3652,57 +3652,57 @@ define <3 x half> @v_maximumnum_v3f16(<3 x half> %x, <3 x half> %y) {
; GFX8-GISEL: ; %bb.0:
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-GISEL-NEXT: v_max_f16_e32 v4, v0, v0
-; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-GISEL-NEXT: v_max_f16_e32 v5, v2, v2
+; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-GISEL-NEXT: v_max_f16_sdwa v2, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT: v_max_f16_e32 v1, v1, v1
-; GFX8-GISEL-NEXT: v_max_f16_e32 v3, v3, v3
; GFX8-GISEL-NEXT: v_max_f16_e32 v4, v4, v5
; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-GISEL-NEXT: v_max_f16_e32 v1, v1, v3
+; GFX8-GISEL-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX8-GISEL-NEXT: v_max_f16_e32 v2, v3, v3
+; GFX8-GISEL-NEXT: v_max_f16_e32 v1, v1, v2
; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX900-SDAG-LABEL: v_maximumnum_v3f16:
-; GFX900-SDAG: ; %bb.0:
-; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
-; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v2
-; GFX900-SDAG-NEXT: v_pk_max_f16 v2, v3, v3
-; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v2
-; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX9-SDAG-LABEL: v_maximumnum_v3f16:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX9-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX9-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX9-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX9-SDAG-NEXT: v_pk_max_f16 v1, v1, v3
+; GFX9-SDAG-NEXT: v_pk_max_f16 v0, v0, v2
+; GFX9-SDAG-NEXT: s_setpc_b64 s[...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/141903
More information about the llvm-commits
mailing list