[llvm] [DAG] isKnownNeverNaN - add DemandedElts element mask to isKnownNeverNaN calls (PR #135952)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 16 07:44:27 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Simon Pilgrim (RKSimon)
<details>
<summary>Changes</summary>
Matches what we've done for computeKnownBits etc. to improve vector handling
---
Full diff: https://github.com/llvm/llvm-project/pull/135952.diff
9 Files Affected:
- (modified) llvm/include/llvm/CodeGen/SelectionDAG.h (+14)
- (modified) llvm/include/llvm/CodeGen/TargetLowering.h (+1)
- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (+60-23)
- (modified) llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp (+1)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp (+3-4)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h (+2-3)
- (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+3-2)
- (modified) llvm/lib/Target/AMDGPU/SIISelLowering.h (+2-3)
- (modified) llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll (+16-17)
``````````diff
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
index 63423463eeee2..2ab6b4d3027e9 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -2142,11 +2142,25 @@ class SelectionDAG {
/// X|Cst == X+Cst iff X&Cst = 0.
bool isBaseWithConstantOffset(SDValue Op) const;
+ /// Test whether the given SDValue (or all elements of it, if it is a
+ /// vector) is known to never be NaN in \p DemandedElts. If \p SNaN is true,
+ /// returns if \p Op is known to never be a signaling NaN (it may still be a
+ /// qNaN).
+ bool isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, bool SNaN = false,
+ unsigned Depth = 0) const;
+
/// Test whether the given SDValue (or all elements of it, if it is a
/// vector) is known to never be NaN. If \p SNaN is true, returns if \p Op is
/// known to never be a signaling NaN (it may still be a qNaN).
bool isKnownNeverNaN(SDValue Op, bool SNaN = false, unsigned Depth = 0) const;
+ /// \returns true if \p Op is known to never be a signaling NaN in \p
+ /// DemandedElts.
+ bool isKnownNeverSNaN(SDValue Op, const APInt &DemandedElts,
+ unsigned Depth = 0) const {
+ return isKnownNeverNaN(Op, DemandedElts, true, Depth);
+ }
+
/// \returns true if \p Op is known to never be a signaling NaN.
bool isKnownNeverSNaN(SDValue Op, unsigned Depth = 0) const {
return isKnownNeverNaN(Op, true, Depth);
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 0a36975f4f625..00c36266a069f 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -4283,6 +4283,7 @@ class TargetLowering : public TargetLoweringBase {
/// NaN. If \p sNaN is true, returns if \p Op is known to never be a signaling
/// NaN.
virtual bool isKnownNeverNaNForTargetNode(SDValue Op,
+ const APInt &DemandedElts,
const SelectionDAG &DAG,
bool SNaN = false,
unsigned Depth = 0) const;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 8682c40898046..64def735afa78 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5601,7 +5601,22 @@ bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const {
(Op.getOpcode() == ISD::ADD || isADDLike(Op));
}
-bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const {
+bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN,
+ unsigned Depth) const {
+ EVT VT = Op.getValueType();
+
+ // Since the number of lanes in a scalable vector is unknown at compile time,
+ // we track one bit which is implicitly broadcast to all lanes. This means
+ // that all lanes in a scalable vector are considered demanded.
+ APInt DemandedElts = VT.isFixedLengthVector()
+ ? APInt::getAllOnes(VT.getVectorNumElements())
+ : APInt(1, 1);
+
+ return isKnownNeverNaN(Op, DemandedElts, SNaN, Depth);
+}
+
+bool SelectionDAG::isKnownNeverNaN(SDValue Op, const APInt &DemandedElts,
+ bool SNaN, unsigned Depth) const {
// If we're told that NaNs won't happen, assume they won't.
if (getTarget().Options.NoNaNsFPMath || Op->getFlags().hasNoNaNs())
return true;
@@ -5615,6 +5630,9 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const
(SNaN && !C->getValueAPF().isSignaling());
}
+ if (!DemandedElts)
+ return false; // No demanded elts, better to assume we don't know anything.
+
unsigned Opcode = Op.getOpcode();
switch (Opcode) {
case ISD::FADD:
@@ -5657,21 +5675,21 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const
case ISD::FLDEXP: {
if (SNaN)
return true;
- return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
+ return isKnownNeverNaN(Op.getOperand(0), DemandedElts, SNaN, Depth + 1);
}
case ISD::FABS:
case ISD::FNEG:
case ISD::FCOPYSIGN: {
- return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
+ return isKnownNeverNaN(Op.getOperand(0), DemandedElts, SNaN, Depth + 1);
}
case ISD::SELECT:
- return isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) &&
- isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1);
+ return isKnownNeverNaN(Op.getOperand(1), DemandedElts, SNaN, Depth + 1) &&
+ isKnownNeverNaN(Op.getOperand(2), DemandedElts, SNaN, Depth + 1);
case ISD::FP_EXTEND:
case ISD::FP_ROUND: {
if (SNaN)
return true;
- return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
+ return isKnownNeverNaN(Op.getOperand(0), DemandedElts, SNaN, Depth + 1);
}
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
@@ -5693,8 +5711,8 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const
case ISD::FMAXIMUMNUM: {
// Only one needs to be known not-nan, since it will be returned if the
// other ends up being one.
- return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) ||
- isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1);
+ return isKnownNeverNaN(Op.getOperand(0), DemandedElts, SNaN, Depth + 1) ||
+ isKnownNeverNaN(Op.getOperand(1), DemandedElts, SNaN, Depth + 1);
}
case ISD::FMINNUM_IEEE:
case ISD::FMAXNUM_IEEE: {
@@ -5702,33 +5720,52 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const
return true;
// This can return a NaN if either operand is an sNaN, or if both operands
// are NaN.
- return (isKnownNeverNaN(Op.getOperand(0), false, Depth + 1) &&
- isKnownNeverSNaN(Op.getOperand(1), Depth + 1)) ||
- (isKnownNeverNaN(Op.getOperand(1), false, Depth + 1) &&
- isKnownNeverSNaN(Op.getOperand(0), Depth + 1));
+ return (isKnownNeverNaN(Op.getOperand(0), DemandedElts, false, Depth + 1) &&
+ isKnownNeverSNaN(Op.getOperand(1), DemandedElts, Depth + 1)) ||
+ (isKnownNeverNaN(Op.getOperand(1), DemandedElts, false, Depth + 1) &&
+ isKnownNeverSNaN(Op.getOperand(0), DemandedElts, Depth + 1));
}
case ISD::FMINIMUM:
case ISD::FMAXIMUM: {
// TODO: Does this quiet or return the origina NaN as-is?
- return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) &&
- isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1);
+ return isKnownNeverNaN(Op.getOperand(0), DemandedElts, SNaN, Depth + 1) &&
+ isKnownNeverNaN(Op.getOperand(1), DemandedElts, SNaN, Depth + 1);
+ }
+ case ISD::EXTRACT_VECTOR_ELT: {
+ SDValue Src = Op.getOperand(0);
+ auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ EVT SrcVT = Src.getValueType();
+ if (SrcVT.isFixedLengthVector() && Idx &&
+ Idx->getAPIntValue().ult(SrcVT.getVectorNumElements())) {
+ APInt DemandedSrcElts = APInt::getOneBitSet(SrcVT.getVectorNumElements(),
+ Idx->getZExtValue());
+ return isKnownNeverNaN(Src, DemandedSrcElts, SNaN, Depth + 1);
+ }
+ return isKnownNeverNaN(Src, SNaN, Depth + 1);
}
- case ISD::EXTRACT_VECTOR_ELT:
case ISD::EXTRACT_SUBVECTOR: {
- return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
+ SDValue Src = Op.getOperand(0);
+ if (Src.getValueType().isFixedLengthVector()) {
+ unsigned Idx = Op.getConstantOperandVal(1);
+ unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
+ APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
+ return isKnownNeverNaN(Src, DemandedSrcElts, SNaN, Depth + 1);
+ }
+ return isKnownNeverNaN(Src, SNaN, Depth + 1);
}
case ISD::BUILD_VECTOR: {
- for (const SDValue &Opnd : Op->ops())
- if (!isKnownNeverNaN(Opnd, SNaN, Depth + 1))
+ unsigned NumElts = Op.getNumOperands();
+ for (unsigned I = 0; I != NumElts; ++I)
+ if (DemandedElts[I] &&
+ !isKnownNeverNaN(Op.getOperand(I), SNaN, Depth + 1))
return false;
return true;
}
default:
- if (Opcode >= ISD::BUILTIN_OP_END ||
- Opcode == ISD::INTRINSIC_WO_CHAIN ||
- Opcode == ISD::INTRINSIC_W_CHAIN ||
- Opcode == ISD::INTRINSIC_VOID) {
- return TLI->isKnownNeverNaNForTargetNode(Op, *this, SNaN, Depth);
+ if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::INTRINSIC_WO_CHAIN ||
+ Opcode == ISD::INTRINSIC_W_CHAIN || Opcode == ISD::INTRINSIC_VOID) {
+ return TLI->isKnownNeverNaNForTargetNode(Op, DemandedElts, *this, SNaN,
+ Depth);
}
return false;
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 5308593b5c988..3995216e3d689 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -3928,6 +3928,7 @@ bool TargetLowering::canCreateUndefOrPoisonForTargetNode(
}
bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
+ const APInt &DemandedElts,
const SelectionDAG &DAG,
bool SNaN,
unsigned Depth) const {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 533ad349f7500..2846405a2538c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -5971,10 +5971,9 @@ unsigned AMDGPUTargetLowering::computeNumSignBitsForTargetInstr(
}
}
-bool AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
- const SelectionDAG &DAG,
- bool SNaN,
- unsigned Depth) const {
+bool AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(
+ SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool SNaN,
+ unsigned Depth) const {
unsigned Opcode = Op.getOpcode();
switch (Opcode) {
case AMDGPUISD::FMIN_LEGACY:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 6705f86e15fc2..fa9d61ec37c24 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -321,9 +321,8 @@ class AMDGPUTargetLowering : public TargetLowering {
const MachineRegisterInfo &MRI,
unsigned Depth = 0) const override;
- bool isKnownNeverNaNForTargetNode(SDValue Op,
- const SelectionDAG &DAG,
- bool SNaN = false,
+ bool isKnownNeverNaNForTargetNode(SDValue Op, const APInt &DemandedElts,
+ const SelectionDAG &DAG, bool SNaN = false,
unsigned Depth = 0) const override;
bool isReassocProfitable(MachineRegisterInfo &MRI, Register N0,
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index bd95bcd89e183..9181d03f9f593 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -16687,6 +16687,7 @@ bool SITargetLowering::denormalsEnabledForType(
}
bool SITargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
+ const APInt &DemandedElts,
const SelectionDAG &DAG,
bool SNaN,
unsigned Depth) const {
@@ -16699,8 +16700,8 @@ bool SITargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
return DAG.isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
}
- return AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(Op, DAG, SNaN,
- Depth);
+ return AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(Op, DemandedElts,
+ DAG, SNaN, Depth);
}
// On older subtargets, global FP atomic instructions have a hardcoded FP mode
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index dc0634331caf9..c42366a1c04c8 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -546,9 +546,8 @@ class SITargetLowering final : public AMDGPUTargetLowering {
bool isProfitableToHoist(Instruction *I) const override;
- bool isKnownNeverNaNForTargetNode(SDValue Op,
- const SelectionDAG &DAG,
- bool SNaN = false,
+ bool isKnownNeverNaNForTargetNode(SDValue Op, const APInt &DemandedElts,
+ const SelectionDAG &DAG, bool SNaN = false,
unsigned Depth = 0) const override;
AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override;
AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
diff --git a/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll b/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
index beac41e42e0c6..ef325da272005 100644
--- a/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
@@ -1057,54 +1057,53 @@ define <3 x half> @v_mad_mix_v3f32_clamp_postcvt(<3 x half> %src0, <3 x half> %s
; SDAG-GFX1100-TRUE16-LABEL: v_mad_mix_v3f32_clamp_postcvt:
; SDAG-GFX1100-TRUE16: ; %bb.0:
; SDAG-GFX1100-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.l
; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v5.l, v2.l
-; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v2.h, v6.l
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
-; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v2, v3, v5, v4 op_sel_hi:[1,1,1]
-; SDAG-GFX1100-TRUE16-NEXT: v_pack_b32_f16 v1, v0.l, 0
+; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v6.l, v4.l
+; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
+; SDAG-GFX1100-TRUE16-NEXT: v_pack_b32_f16 v1, v1.l, 0
+; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v3, v3, v5, v6 op_sel_hi:[1,1,1] clamp
; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; SDAG-GFX1100-TRUE16-NEXT: v_pk_max_f16 v0, v2, v2 clamp
; SDAG-GFX1100-TRUE16-NEXT: v_pk_max_f16 v1, v1, v1 clamp
+; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX1100-TRUE16-NEXT: v_mov_b32_e32 v0, v3
; SDAG-GFX1100-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-GFX1100-FAKE16-LABEL: v_mad_mix_v3f32_clamp_postcvt:
; SDAG-GFX1100-FAKE16: ; %bb.0:
; SDAG-GFX1100-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
+; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
; SDAG-GFX1100-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
; SDAG-GFX1100-FAKE16-NEXT: v_pack_b32_f16 v1, v1, 0
+; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
; SDAG-GFX1100-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; SDAG-GFX1100-FAKE16-NEXT: v_pk_max_f16 v0, v6, v6 clamp
; SDAG-GFX1100-FAKE16-NEXT: v_pk_max_f16 v1, v1, v1 clamp
+; SDAG-GFX1100-FAKE16-NEXT: v_mov_b32_e32 v0, v3
; SDAG-GFX1100-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-GFX900-LABEL: v_mad_mix_v3f32_clamp_postcvt:
; SDAG-GFX900: ; %bb.0:
; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
+; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
; SDAG-GFX900-NEXT: v_pack_b32_f16 v1, v1, 0
-; SDAG-GFX900-NEXT: v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; SDAG-GFX900-NEXT: v_pk_max_f16 v0, v6, v6 clamp
+; SDAG-GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
; SDAG-GFX900-NEXT: v_pk_max_f16 v1, v1, v1 clamp
+; SDAG-GFX900-NEXT: v_mov_b32_e32 v0, v3
; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-GFX906-LABEL: v_mad_mix_v3f32_clamp_postcvt:
; SDAG-GFX906: ; %bb.0:
; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
+; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
; SDAG-GFX906-NEXT: v_pack_b32_f16 v1, v1, 0
-; SDAG-GFX906-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; SDAG-GFX906-NEXT: v_pk_max_f16 v0, v6, v6 clamp
+; SDAG-GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
; SDAG-GFX906-NEXT: v_pk_max_f16 v1, v1, v1 clamp
+; SDAG-GFX906-NEXT: v_mov_b32_e32 v0, v3
; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-VI-LABEL: v_mad_mix_v3f32_clamp_postcvt:
``````````
</details>
https://github.com/llvm/llvm-project/pull/135952
More information about the llvm-commits
mailing list