[llvm-branch-commits] [llvm] [AMDGPU][DAGCombiner][GlobalISel] Extend allMulUsesCanBeContracted with FNEG pattern (PR #188115)
Adel Ejjeh via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Mar 30 09:54:26 PDT 2026
https://github.com/adelejjeh updated https://github.com/llvm/llvm-project/pull/188115
>From 0210ce33ebf52cc9b9d7499aa4d401eb0b1a7638 Mon Sep 17 00:00:00 2001
From: Adel Ejjeh <adel.ejjeh at amd.com>
Date: Thu, 5 Mar 2026 12:18:16 -0600
Subject: [PATCH] [AMDGPU][DAGCombiner][GlobalISel] Extend
allMulUsesCanBeContracted with FNEG pattern
Extend allMulUsesCanBeContracted() to recognize fmul -> fneg -> fsub
chains as contractable uses. This allows FMA contraction when a multiply
feeds an fneg that is only used by fsub operations.
Changes:
- DAGCombiner.cpp: Add ISD::FNEG case to allMulUsesCanBeContracted()
checking that all FNEG users are ISD::FSUB. Update 1 fold site guard
in visitFSUBForFMACombine (fsub(fneg(fmul))).
- CombinerHelper.cpp: Add G_FNEG case to allMulUsesCanBeContracted()
checking that all FNEG users are G_FSUB. Update 2 fold site guards
in matchCombineFSubFNegFMulToFMadOrFMA. Fix guard ordering to check
isContractableFMul before allMulUsesCanBeContracted (cheap first).
- Add 7 new test functions to fma-multiple-uses-contraction.ll covering
fneg single-use, multi-use, mixed contractable/non-contractable, and
cross-pattern (P1 direct + P2 fneg) interactions.
- Update mad-combine.ll CHECK lines affected by the guard changes.
Note: FADD is intentionally not checked as an FNEG user because
fadd(fneg(x), y) is canonicalized to fsub(y, x) before FMA combine
runs in both SDAG (visitFSUB) and GISel (redundant_neg_operands).
FPEXT inside FNEG chains is deferred to a follow-up patch.
Made-with: Cursor
---
.../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 40 +-
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 24 +-
.../AMDGPU/fma-multiple-uses-contraction.ll | 762 +++++++++---------
llvm/test/CodeGen/AMDGPU/mad-combine.ll | 9 +-
4 files changed, 421 insertions(+), 414 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 297d6edac2e5c..d2bf2568df276 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -6316,8 +6316,9 @@ static bool hasMoreUses(const MachineInstr &MI0, const MachineInstr &MI1,
/// would duplicate the multiply without reducing the total number of
/// operations.
///
-/// Currently checks for the following pattern:
+/// Currently checks for the following patterns:
/// - fmul --> fadd/fsub: Direct contraction
+/// - fmul --> fneg --> fsub: Contraction through fneg
bool CombinerHelper::allMulUsesCanBeContracted(const MachineInstr &MI) const {
Register MulReg = MI.getOperand(0).getReg();
@@ -6328,6 +6329,17 @@ bool CombinerHelper::allMulUsesCanBeContracted(const MachineInstr &MI) const {
if (Opcode == TargetOpcode::G_FADD || Opcode == TargetOpcode::G_FSUB)
continue;
+ // G_FNEG use - contractable if all users of the fneg are G_FSUB.
+ if (Opcode == TargetOpcode::G_FNEG) {
+ Register FNegReg = UseMI.getOperand(0).getReg();
+ for (const MachineInstr &FNegUser : MRI.use_nodbg_instructions(FNegReg)) {
+ unsigned FNegUserOp = FNegUser.getOpcode();
+ if (FNegUserOp != TargetOpcode::G_FSUB)
+ return false;
+ }
+ continue;
+ }
+
// Any other use type is not currently recognized as contractable.
return false;
}
@@ -6751,10 +6763,15 @@ bool CombinerHelper::matchCombineFSubFNegFMulToFMadOrFMA(
MachineInstr *FMulMI;
// fold (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
+ // Only contract if both fneg and fmul have one use (both eliminated), or
+ // under Aggressive mode if all uses of the multiply are contractable
+ // (including through fneg -> fsub chains), avoiding duplication of the
+ // multiply without reducing total operations.
if (mi_match(LHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
- (Aggressive || (MRI.hasOneNonDBGUse(LHSReg) &&
- MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
- isContractableFMul(*FMulMI, AllowFusionGlobally)) {
+ isContractableFMul(*FMulMI, AllowFusionGlobally) &&
+ ((MRI.hasOneNonDBGUse(LHSReg) &&
+ MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg())) ||
+ (Aggressive && allMulUsesCanBeContracted(*FMulMI)))) {
MatchInfo = [=, &MI](MachineIRBuilder &B) {
Register NegX =
B.buildFNeg(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
@@ -6766,10 +6783,19 @@ bool CombinerHelper::matchCombineFSubFNegFMulToFMadOrFMA(
}
// fold (fsub x, (fneg (fmul, y, z))) -> (fma y, z, x)
+ // Note: In the standard combiner ordering, redundant_neg_operands
+ // canonicalizes fsub(x, fneg(y)) -> fadd(x, y) before fma_combines runs,
+ // so this fold may not fire in practice. It is kept as defensive code
+ // against combiner reordering.
+ // Only contract if both fneg and fmul have one use (both eliminated), or
+ // under Aggressive mode if all uses of the multiply are contractable
+ // (including through fneg -> fsub chains), avoiding duplication of the
+ // multiply without reducing total operations.
if (mi_match(RHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
- (Aggressive || (MRI.hasOneNonDBGUse(RHSReg) &&
- MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
- isContractableFMul(*FMulMI, AllowFusionGlobally)) {
+ isContractableFMul(*FMulMI, AllowFusionGlobally) &&
+ ((MRI.hasOneNonDBGUse(RHSReg) &&
+ MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg())) ||
+ (Aggressive && allMulUsesCanBeContracted(*FMulMI)))) {
MatchInfo = [=, &MI](MachineIRBuilder &B) {
B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
{FMulMI->getOperand(1).getReg(),
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 4e1fab5391e87..ffcdb9c40bb81 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -17685,8 +17685,9 @@ static bool isFusedOp(const MatchContextClass &Matcher, SDValue N) {
/// would duplicate the multiply without reducing the total number of
/// operations.
///
-/// Currently checks for the following pattern:
+/// Currently checks for the following patterns:
/// - fmul --> fadd/fsub: Direct contraction
+/// - fmul --> fneg --> fsub: Contraction through fneg
static bool allMulUsesCanBeContracted(SDValue Mul) {
for (const auto *User : Mul->users()) {
unsigned Opcode = User->getOpcode();
@@ -17695,6 +17696,16 @@ static bool allMulUsesCanBeContracted(SDValue Mul) {
if (Opcode == ISD::FADD || Opcode == ISD::FSUB)
continue;
+ // FNEG use - contractable if all users of the fneg are FSUB.
+ if (Opcode == ISD::FNEG) {
+ for (const auto *FNegUser : User->users()) {
+ unsigned FNegUserOp = FNegUser->getOpcode();
+ if (FNegUserOp != ISD::FSUB)
+ return false;
+ }
+ continue;
+ }
+
// Any other use type is not currently recognized as contractable.
return false;
}
@@ -18038,8 +18049,17 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
}
// fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
+ // Note: SDAG does not need the symmetric fold (fsub x, (fneg (fmul y, z)))
+ // because visitFSUB canonicalizes fsub(A, fneg(B)) -> fadd(A, B) before
+ // calling visitFSUBForFMACombine, so that pattern is handled by
+ // visitFADDForFMACombine instead.
+ // Only contract if the multiply has one use (both fneg and fmul eliminated),
+ // or under Aggressive mode if all uses of the multiply are contractable
+ // (including through fneg -> fsub chains), avoiding duplication of the
+ // multiply without reducing total operations.
if (matcher.match(N0, ISD::FNEG) && isContractableFMUL(N0.getOperand(0)) &&
- (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
+ ((N0->hasOneUse() && N0.getOperand(0).hasOneUse()) ||
+ (Aggressive && allMulUsesCanBeContracted(N0.getOperand(0))))) {
SDValue N00 = N0.getOperand(0).getOperand(0);
SDValue N01 = N0.getOperand(0).getOperand(1);
return matcher.getNode(PreferredFusedOpcode, SL, VT,
diff --git a/llvm/test/CodeGen/AMDGPU/fma-multiple-uses-contraction.ll b/llvm/test/CodeGen/AMDGPU/fma-multiple-uses-contraction.ll
index 803561b2b29a6..99dabe9961033 100644
--- a/llvm/test/CodeGen/AMDGPU/fma-multiple-uses-contraction.ll
+++ b/llvm/test/CodeGen/AMDGPU/fma-multiple-uses-contraction.ll
@@ -714,15 +714,9 @@ define { float, float, float } @mul_three_contractable_uses(float %a, float %b,
ret { float, float, float } %ret2
}
-
; ==========================================================================
; FNEG patterns
; Tests for allMulUsesCanBeContracted recognizing fneg as a transparent user.
-;
-; NOTE: The allMulUsesCanBeContracted guard does not yet recognize fneg as
-; transparent. That support is added by the next patch in the series. Until
-; then, the CHECK lines below reflect current (potentially over-conservative)
-; codegen and may not match the "Expected:" comments on individual tests.
; ==========================================================================
; Test case: fmul -> fneg -> fsub (single use chain).
@@ -731,55 +725,55 @@ define { float, float, float } @mul_three_contractable_uses(float %a, float %b,
; Should contract -- single-use chain, fneg folds into fma.
; Expected: single fma/mad, no v_mul.
define float @mul_fneg_fsub_single_use(float %a, float %b, float %c) {
-; P0-GFX9-SDAG-F32FLUSH-LABEL: mul_fneg_fsub_single_use:
-; P0-GFX9-SDAG-F32FLUSH: ; %bb.0:
-; P0-GFX9-SDAG-F32FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; P0-GFX9-SDAG-F32FLUSH-NEXT: v_mad_f32 v0, v0, -v1, -v2
-; P0-GFX9-SDAG-F32FLUSH-NEXT: s_setpc_b64 s[30:31]
+; GFX9-SDAG-F32FLUSH-LABEL: mul_fneg_fsub_single_use:
+; GFX9-SDAG-F32FLUSH: ; %bb.0:
+; GFX9-SDAG-F32FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-F32FLUSH-NEXT: v_mad_f32 v0, v0, -v1, -v2
+; GFX9-SDAG-F32FLUSH-NEXT: s_setpc_b64 s[30:31]
;
-; P0-GFX9-GISEL-F32FLUSH-LABEL: mul_fneg_fsub_single_use:
-; P0-GFX9-GISEL-F32FLUSH: ; %bb.0:
-; P0-GFX9-GISEL-F32FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; P0-GFX9-GISEL-F32FLUSH-NEXT: v_mad_f32 v0, v0, -v1, -v2
-; P0-GFX9-GISEL-F32FLUSH-NEXT: s_setpc_b64 s[30:31]
+; GFX9-GISEL-F32FLUSH-LABEL: mul_fneg_fsub_single_use:
+; GFX9-GISEL-F32FLUSH: ; %bb.0:
+; GFX9-GISEL-F32FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-F32FLUSH-NEXT: v_mad_f32 v0, v0, -v1, -v2
+; GFX9-GISEL-F32FLUSH-NEXT: s_setpc_b64 s[30:31]
;
-; P0-GFX9_4-SDAG-LABEL: mul_fneg_fsub_single_use:
-; P0-GFX9_4-SDAG: ; %bb.0:
-; P0-GFX9_4-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; P0-GFX9_4-SDAG-NEXT: v_fma_f32 v0, -v0, v1, -v2
-; P0-GFX9_4-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX9_4-SDAG-LABEL: mul_fneg_fsub_single_use:
+; GFX9_4-SDAG: ; %bb.0:
+; GFX9_4-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9_4-SDAG-NEXT: v_fma_f32 v0, -v0, v1, -v2
+; GFX9_4-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; P0-GFX9_4-GISEL-LABEL: mul_fneg_fsub_single_use:
-; P0-GFX9_4-GISEL: ; %bb.0:
-; P0-GFX9_4-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; P0-GFX9_4-GISEL-NEXT: v_fma_f32 v0, v0, -v1, -v2
-; P0-GFX9_4-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX9_4-GISEL-LABEL: mul_fneg_fsub_single_use:
+; GFX9_4-GISEL: ; %bb.0:
+; GFX9_4-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9_4-GISEL-NEXT: v_fma_f32 v0, v0, -v1, -v2
+; GFX9_4-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; P0-GFX12_5-SDAG-LABEL: mul_fneg_fsub_single_use:
-; P0-GFX12_5-SDAG: ; %bb.0:
-; P0-GFX12_5-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
-; P0-GFX12_5-SDAG-NEXT: s_wait_kmcnt 0x0
-; P0-GFX12_5-SDAG-NEXT: v_fma_f32 v0, -v0, v1, -v2
-; P0-GFX12_5-SDAG-NEXT: s_set_pc_i64 s[30:31]
+; GFX12_5-SDAG-LABEL: mul_fneg_fsub_single_use:
+; GFX12_5-SDAG: ; %bb.0:
+; GFX12_5-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12_5-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12_5-SDAG-NEXT: v_fma_f32 v0, -v0, v1, -v2
+; GFX12_5-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
-; P0-GFX12_5-GISEL-LABEL: mul_fneg_fsub_single_use:
-; P0-GFX12_5-GISEL: ; %bb.0:
-; P0-GFX12_5-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
-; P0-GFX12_5-GISEL-NEXT: s_wait_kmcnt 0x0
-; P0-GFX12_5-GISEL-NEXT: v_fma_f32 v0, v0, -v1, -v2
-; P0-GFX12_5-GISEL-NEXT: s_set_pc_i64 s[30:31]
+; GFX12_5-GISEL-LABEL: mul_fneg_fsub_single_use:
+; GFX12_5-GISEL: ; %bb.0:
+; GFX12_5-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12_5-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12_5-GISEL-NEXT: v_fma_f32 v0, v0, -v1, -v2
+; GFX12_5-GISEL-NEXT: s_set_pc_i64 s[30:31]
;
-; P0-GFX9-SDAG-F32DENORM-LABEL: mul_fneg_fsub_single_use:
-; P0-GFX9-SDAG-F32DENORM: ; %bb.0:
-; P0-GFX9-SDAG-F32DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; P0-GFX9-SDAG-F32DENORM-NEXT: v_fma_f32 v0, -v0, v1, -v2
-; P0-GFX9-SDAG-F32DENORM-NEXT: s_setpc_b64 s[30:31]
+; GFX9-SDAG-F32DENORM-LABEL: mul_fneg_fsub_single_use:
+; GFX9-SDAG-F32DENORM: ; %bb.0:
+; GFX9-SDAG-F32DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-F32DENORM-NEXT: v_fma_f32 v0, -v0, v1, -v2
+; GFX9-SDAG-F32DENORM-NEXT: s_setpc_b64 s[30:31]
;
-; P0-GFX9-GISEL-F32DENORM-LABEL: mul_fneg_fsub_single_use:
-; P0-GFX9-GISEL-F32DENORM: ; %bb.0:
-; P0-GFX9-GISEL-F32DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; P0-GFX9-GISEL-F32DENORM-NEXT: v_fma_f32 v0, v0, -v1, -v2
-; P0-GFX9-GISEL-F32DENORM-NEXT: s_setpc_b64 s[30:31]
+; GFX9-GISEL-F32DENORM-LABEL: mul_fneg_fsub_single_use:
+; GFX9-GISEL-F32DENORM: ; %bb.0:
+; GFX9-GISEL-F32DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-F32DENORM-NEXT: v_fma_f32 v0, v0, -v1, -v2
+; GFX9-GISEL-F32DENORM-NEXT: s_setpc_b64 s[30:31]
%mul = fmul contract float %a, %b
%neg = fneg contract float %mul
%sub = fsub contract float %neg, %c ; contractable
@@ -792,71 +786,71 @@ define float @mul_fneg_fsub_single_use(float %a, float %b, float %c) {
; Should contract -- all fneg uses are contractable fsubs.
; Expected: two fma/mad instructions, no v_mul.
define { float, float } @mul_fneg_multiple_fsub_uses(float %a, float %b, float %c, float %d) {
-; P0-GFX9-SDAG-F32FLUSH-LABEL: mul_fneg_multiple_fsub_uses:
-; P0-GFX9-SDAG-F32FLUSH: ; %bb.0:
-; P0-GFX9-SDAG-F32FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; P0-GFX9-SDAG-F32FLUSH-NEXT: v_mad_f32 v2, v0, -v1, -v2
-; P0-GFX9-SDAG-F32FLUSH-NEXT: v_mad_f32 v1, v0, -v1, -v3
-; P0-GFX9-SDAG-F32FLUSH-NEXT: v_mov_b32_e32 v0, v2
-; P0-GFX9-SDAG-F32FLUSH-NEXT: s_setpc_b64 s[30:31]
+; GFX9-SDAG-F32FLUSH-LABEL: mul_fneg_multiple_fsub_uses:
+; GFX9-SDAG-F32FLUSH: ; %bb.0:
+; GFX9-SDAG-F32FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-F32FLUSH-NEXT: v_mad_f32 v2, v0, -v1, -v2
+; GFX9-SDAG-F32FLUSH-NEXT: v_mad_f32 v1, v0, -v1, -v3
+; GFX9-SDAG-F32FLUSH-NEXT: v_mov_b32_e32 v0, v2
+; GFX9-SDAG-F32FLUSH-NEXT: s_setpc_b64 s[30:31]
;
-; P0-GFX9-GISEL-F32FLUSH-LABEL: mul_fneg_multiple_fsub_uses:
-; P0-GFX9-GISEL-F32FLUSH: ; %bb.0:
-; P0-GFX9-GISEL-F32FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; P0-GFX9-GISEL-F32FLUSH-NEXT: v_mad_f32 v2, v0, -v1, -v2
-; P0-GFX9-GISEL-F32FLUSH-NEXT: v_mad_f32 v1, v0, -v1, -v3
-; P0-GFX9-GISEL-F32FLUSH-NEXT: v_mov_b32_e32 v0, v2
-; P0-GFX9-GISEL-F32FLUSH-NEXT: s_setpc_b64 s[30:31]
+; GFX9-GISEL-F32FLUSH-LABEL: mul_fneg_multiple_fsub_uses:
+; GFX9-GISEL-F32FLUSH: ; %bb.0:
+; GFX9-GISEL-F32FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-F32FLUSH-NEXT: v_mad_f32 v2, v0, -v1, -v2
+; GFX9-GISEL-F32FLUSH-NEXT: v_mad_f32 v1, v0, -v1, -v3
+; GFX9-GISEL-F32FLUSH-NEXT: v_mov_b32_e32 v0, v2
+; GFX9-GISEL-F32FLUSH-NEXT: s_setpc_b64 s[30:31]
;
-; P0-GFX9_4-SDAG-LABEL: mul_fneg_multiple_fsub_uses:
-; P0-GFX9_4-SDAG: ; %bb.0:
-; P0-GFX9_4-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; P0-GFX9_4-SDAG-NEXT: v_fma_f32 v2, -v0, v1, -v2
-; P0-GFX9_4-SDAG-NEXT: v_fma_f32 v1, -v0, v1, -v3
-; P0-GFX9_4-SDAG-NEXT: v_mov_b32_e32 v0, v2
-; P0-GFX9_4-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX9_4-SDAG-LABEL: mul_fneg_multiple_fsub_uses:
+; GFX9_4-SDAG: ; %bb.0:
+; GFX9_4-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9_4-SDAG-NEXT: v_fma_f32 v2, -v0, v1, -v2
+; GFX9_4-SDAG-NEXT: v_fma_f32 v1, -v0, v1, -v3
+; GFX9_4-SDAG-NEXT: v_mov_b32_e32 v0, v2
+; GFX9_4-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; P0-GFX9_4-GISEL-LABEL: mul_fneg_multiple_fsub_uses:
-; P0-GFX9_4-GISEL: ; %bb.0:
-; P0-GFX9_4-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; P0-GFX9_4-GISEL-NEXT: v_fma_f32 v2, v0, -v1, -v2
-; P0-GFX9_4-GISEL-NEXT: v_fma_f32 v1, v0, -v1, -v3
-; P0-GFX9_4-GISEL-NEXT: v_mov_b32_e32 v0, v2
-; P0-GFX9_4-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX9_4-GISEL-LABEL: mul_fneg_multiple_fsub_uses:
+; GFX9_4-GISEL: ; %bb.0:
+; GFX9_4-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9_4-GISEL-NEXT: v_fma_f32 v2, v0, -v1, -v2
+; GFX9_4-GISEL-NEXT: v_fma_f32 v1, v0, -v1, -v3
+; GFX9_4-GISEL-NEXT: v_mov_b32_e32 v0, v2
+; GFX9_4-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; P0-GFX12_5-SDAG-LABEL: mul_fneg_multiple_fsub_uses:
-; P0-GFX12_5-SDAG: ; %bb.0:
-; P0-GFX12_5-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
-; P0-GFX12_5-SDAG-NEXT: s_wait_kmcnt 0x0
-; P0-GFX12_5-SDAG-NEXT: v_dual_fma_f32 v2, -v0, v1, -v2 :: v_dual_fma_f32 v1, -v0, v1, -v3
-; P0-GFX12_5-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; P0-GFX12_5-SDAG-NEXT: v_mov_b32_e32 v0, v2
-; P0-GFX12_5-SDAG-NEXT: s_set_pc_i64 s[30:31]
+; GFX12_5-SDAG-LABEL: mul_fneg_multiple_fsub_uses:
+; GFX12_5-SDAG: ; %bb.0:
+; GFX12_5-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12_5-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12_5-SDAG-NEXT: v_dual_fma_f32 v2, -v0, v1, -v2 :: v_dual_fma_f32 v1, -v0, v1, -v3
+; GFX12_5-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12_5-SDAG-NEXT: v_mov_b32_e32 v0, v2
+; GFX12_5-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
-; P0-GFX12_5-GISEL-LABEL: mul_fneg_multiple_fsub_uses:
-; P0-GFX12_5-GISEL: ; %bb.0:
-; P0-GFX12_5-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
-; P0-GFX12_5-GISEL-NEXT: s_wait_kmcnt 0x0
-; P0-GFX12_5-GISEL-NEXT: v_dual_fma_f32 v2, v0, -v1, -v2 :: v_dual_fma_f32 v1, v0, -v1, -v3
-; P0-GFX12_5-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; P0-GFX12_5-GISEL-NEXT: v_mov_b32_e32 v0, v2
-; P0-GFX12_5-GISEL-NEXT: s_set_pc_i64 s[30:31]
+; GFX12_5-GISEL-LABEL: mul_fneg_multiple_fsub_uses:
+; GFX12_5-GISEL: ; %bb.0:
+; GFX12_5-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12_5-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12_5-GISEL-NEXT: v_dual_fma_f32 v2, v0, -v1, -v2 :: v_dual_fma_f32 v1, v0, -v1, -v3
+; GFX12_5-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12_5-GISEL-NEXT: v_mov_b32_e32 v0, v2
+; GFX12_5-GISEL-NEXT: s_set_pc_i64 s[30:31]
;
-; P0-GFX9-SDAG-F32DENORM-LABEL: mul_fneg_multiple_fsub_uses:
-; P0-GFX9-SDAG-F32DENORM: ; %bb.0:
-; P0-GFX9-SDAG-F32DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; P0-GFX9-SDAG-F32DENORM-NEXT: v_fma_f32 v2, -v0, v1, -v2
-; P0-GFX9-SDAG-F32DENORM-NEXT: v_fma_f32 v1, -v0, v1, -v3
-; P0-GFX9-SDAG-F32DENORM-NEXT: v_mov_b32_e32 v0, v2
-; P0-GFX9-SDAG-F32DENORM-NEXT: s_setpc_b64 s[30:31]
+; GFX9-SDAG-F32DENORM-LABEL: mul_fneg_multiple_fsub_uses:
+; GFX9-SDAG-F32DENORM: ; %bb.0:
+; GFX9-SDAG-F32DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-F32DENORM-NEXT: v_fma_f32 v2, -v0, v1, -v2
+; GFX9-SDAG-F32DENORM-NEXT: v_fma_f32 v1, -v0, v1, -v3
+; GFX9-SDAG-F32DENORM-NEXT: v_mov_b32_e32 v0, v2
+; GFX9-SDAG-F32DENORM-NEXT: s_setpc_b64 s[30:31]
;
-; P0-GFX9-GISEL-F32DENORM-LABEL: mul_fneg_multiple_fsub_uses:
-; P0-GFX9-GISEL-F32DENORM: ; %bb.0:
-; P0-GFX9-GISEL-F32DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; P0-GFX9-GISEL-F32DENORM-NEXT: v_fma_f32 v2, v0, -v1, -v2
-; P0-GFX9-GISEL-F32DENORM-NEXT: v_fma_f32 v1, v0, -v1, -v3
-; P0-GFX9-GISEL-F32DENORM-NEXT: v_mov_b32_e32 v0, v2
-; P0-GFX9-GISEL-F32DENORM-NEXT: s_setpc_b64 s[30:31]
+; GFX9-GISEL-F32DENORM-LABEL: mul_fneg_multiple_fsub_uses:
+; GFX9-GISEL-F32DENORM: ; %bb.0:
+; GFX9-GISEL-F32DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-F32DENORM-NEXT: v_fma_f32 v2, v0, -v1, -v2
+; GFX9-GISEL-F32DENORM-NEXT: v_fma_f32 v1, v0, -v1, -v3
+; GFX9-GISEL-F32DENORM-NEXT: v_mov_b32_e32 v0, v2
+; GFX9-GISEL-F32DENORM-NEXT: s_setpc_b64 s[30:31]
%mul = fmul contract float %a, %b
%neg = fneg contract float %mul
%sub1 = fsub contract float %neg, %c ; contractable
@@ -872,71 +866,71 @@ define { float, float } @mul_fneg_multiple_fsub_uses(float %a, float %b, float %
; Should contract -- both fneg uses (fsub, fadd) are contractable.
; Expected: two fma/mad instructions, no v_mul.
define { float, float } @mul_fneg_mixed_uses(float %a, float %b, float %c, float %d) {
-; P0-GFX9-SDAG-F32FLUSH-LABEL: mul_fneg_mixed_uses:
-; P0-GFX9-SDAG-F32FLUSH: ; %bb.0:
-; P0-GFX9-SDAG-F32FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; P0-GFX9-SDAG-F32FLUSH-NEXT: v_mad_f32 v2, -v0, v1, -v2
-; P0-GFX9-SDAG-F32FLUSH-NEXT: v_mad_f32 v1, -v0, v1, v3
-; P0-GFX9-SDAG-F32FLUSH-NEXT: v_mov_b32_e32 v0, v2
-; P0-GFX9-SDAG-F32FLUSH-NEXT: s_setpc_b64 s[30:31]
+; GFX9-SDAG-F32FLUSH-LABEL: mul_fneg_mixed_uses:
+; GFX9-SDAG-F32FLUSH: ; %bb.0:
+; GFX9-SDAG-F32FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-F32FLUSH-NEXT: v_mad_f32 v2, -v0, v1, -v2
+; GFX9-SDAG-F32FLUSH-NEXT: v_mad_f32 v1, -v0, v1, v3
+; GFX9-SDAG-F32FLUSH-NEXT: v_mov_b32_e32 v0, v2
+; GFX9-SDAG-F32FLUSH-NEXT: s_setpc_b64 s[30:31]
;
-; P0-GFX9-GISEL-F32FLUSH-LABEL: mul_fneg_mixed_uses:
-; P0-GFX9-GISEL-F32FLUSH: ; %bb.0:
-; P0-GFX9-GISEL-F32FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; P0-GFX9-GISEL-F32FLUSH-NEXT: v_mad_f32 v2, v0, -v1, -v2
-; P0-GFX9-GISEL-F32FLUSH-NEXT: v_mad_f32 v1, v0, -v1, v3
-; P0-GFX9-GISEL-F32FLUSH-NEXT: v_mov_b32_e32 v0, v2
-; P0-GFX9-GISEL-F32FLUSH-NEXT: s_setpc_b64 s[30:31]
+; GFX9-GISEL-F32FLUSH-LABEL: mul_fneg_mixed_uses:
+; GFX9-GISEL-F32FLUSH: ; %bb.0:
+; GFX9-GISEL-F32FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-F32FLUSH-NEXT: v_mad_f32 v2, v0, -v1, -v2
+; GFX9-GISEL-F32FLUSH-NEXT: v_mad_f32 v1, v0, -v1, v3
+; GFX9-GISEL-F32FLUSH-NEXT: v_mov_b32_e32 v0, v2
+; GFX9-GISEL-F32FLUSH-NEXT: s_setpc_b64 s[30:31]
;
-; P0-GFX9_4-SDAG-LABEL: mul_fneg_mixed_uses:
-; P0-GFX9_4-SDAG: ; %bb.0:
-; P0-GFX9_4-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; P0-GFX9_4-SDAG-NEXT: v_fma_f32 v2, -v0, v1, -v2
-; P0-GFX9_4-SDAG-NEXT: v_fma_f32 v1, -v0, v1, v3
-; P0-GFX9_4-SDAG-NEXT: v_mov_b32_e32 v0, v2
-; P0-GFX9_4-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX9_4-SDAG-LABEL: mul_fneg_mixed_uses:
+; GFX9_4-SDAG: ; %bb.0:
+; GFX9_4-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9_4-SDAG-NEXT: v_fma_f32 v2, -v0, v1, -v2
+; GFX9_4-SDAG-NEXT: v_fma_f32 v1, -v0, v1, v3
+; GFX9_4-SDAG-NEXT: v_mov_b32_e32 v0, v2
+; GFX9_4-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; P0-GFX9_4-GISEL-LABEL: mul_fneg_mixed_uses:
-; P0-GFX9_4-GISEL: ; %bb.0:
-; P0-GFX9_4-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; P0-GFX9_4-GISEL-NEXT: v_fma_f32 v2, v0, -v1, -v2
-; P0-GFX9_4-GISEL-NEXT: v_fma_f32 v1, v0, -v1, v3
-; P0-GFX9_4-GISEL-NEXT: v_mov_b32_e32 v0, v2
-; P0-GFX9_4-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX9_4-GISEL-LABEL: mul_fneg_mixed_uses:
+; GFX9_4-GISEL: ; %bb.0:
+; GFX9_4-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9_4-GISEL-NEXT: v_fma_f32 v2, v0, -v1, -v2
+; GFX9_4-GISEL-NEXT: v_fma_f32 v1, v0, -v1, v3
+; GFX9_4-GISEL-NEXT: v_mov_b32_e32 v0, v2
+; GFX9_4-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; P0-GFX12_5-SDAG-LABEL: mul_fneg_mixed_uses:
-; P0-GFX12_5-SDAG: ; %bb.0:
-; P0-GFX12_5-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
-; P0-GFX12_5-SDAG-NEXT: s_wait_kmcnt 0x0
-; P0-GFX12_5-SDAG-NEXT: v_dual_fma_f32 v2, -v0, v1, -v2 :: v_dual_fma_f32 v1, -v0, v1, v3
-; P0-GFX12_5-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; P0-GFX12_5-SDAG-NEXT: v_mov_b32_e32 v0, v2
-; P0-GFX12_5-SDAG-NEXT: s_set_pc_i64 s[30:31]
+; GFX12_5-SDAG-LABEL: mul_fneg_mixed_uses:
+; GFX12_5-SDAG: ; %bb.0:
+; GFX12_5-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12_5-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12_5-SDAG-NEXT: v_dual_fma_f32 v2, -v0, v1, -v2 :: v_dual_fma_f32 v1, -v0, v1, v3
+; GFX12_5-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12_5-SDAG-NEXT: v_mov_b32_e32 v0, v2
+; GFX12_5-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
-; P0-GFX12_5-GISEL-LABEL: mul_fneg_mixed_uses:
-; P0-GFX12_5-GISEL: ; %bb.0:
-; P0-GFX12_5-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
-; P0-GFX12_5-GISEL-NEXT: s_wait_kmcnt 0x0
-; P0-GFX12_5-GISEL-NEXT: v_dual_fma_f32 v2, v0, -v1, -v2 :: v_dual_fma_f32 v1, v0, -v1, v3
-; P0-GFX12_5-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; P0-GFX12_5-GISEL-NEXT: v_mov_b32_e32 v0, v2
-; P0-GFX12_5-GISEL-NEXT: s_set_pc_i64 s[30:31]
+; GFX12_5-GISEL-LABEL: mul_fneg_mixed_uses:
+; GFX12_5-GISEL: ; %bb.0:
+; GFX12_5-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12_5-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12_5-GISEL-NEXT: v_dual_fma_f32 v2, v0, -v1, -v2 :: v_dual_fma_f32 v1, v0, -v1, v3
+; GFX12_5-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12_5-GISEL-NEXT: v_mov_b32_e32 v0, v2
+; GFX12_5-GISEL-NEXT: s_set_pc_i64 s[30:31]
;
-; P0-GFX9-SDAG-F32DENORM-LABEL: mul_fneg_mixed_uses:
-; P0-GFX9-SDAG-F32DENORM: ; %bb.0:
-; P0-GFX9-SDAG-F32DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; P0-GFX9-SDAG-F32DENORM-NEXT: v_fma_f32 v2, -v0, v1, -v2
-; P0-GFX9-SDAG-F32DENORM-NEXT: v_fma_f32 v1, -v0, v1, v3
-; P0-GFX9-SDAG-F32DENORM-NEXT: v_mov_b32_e32 v0, v2
-; P0-GFX9-SDAG-F32DENORM-NEXT: s_setpc_b64 s[30:31]
+; GFX9-SDAG-F32DENORM-LABEL: mul_fneg_mixed_uses:
+; GFX9-SDAG-F32DENORM: ; %bb.0:
+; GFX9-SDAG-F32DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-F32DENORM-NEXT: v_fma_f32 v2, -v0, v1, -v2
+; GFX9-SDAG-F32DENORM-NEXT: v_fma_f32 v1, -v0, v1, v3
+; GFX9-SDAG-F32DENORM-NEXT: v_mov_b32_e32 v0, v2
+; GFX9-SDAG-F32DENORM-NEXT: s_setpc_b64 s[30:31]
;
-; P0-GFX9-GISEL-F32DENORM-LABEL: mul_fneg_mixed_uses:
-; P0-GFX9-GISEL-F32DENORM: ; %bb.0:
-; P0-GFX9-GISEL-F32DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; P0-GFX9-GISEL-F32DENORM-NEXT: v_fma_f32 v2, v0, -v1, -v2
-; P0-GFX9-GISEL-F32DENORM-NEXT: v_fma_f32 v1, v0, -v1, v3
-; P0-GFX9-GISEL-F32DENORM-NEXT: v_mov_b32_e32 v0, v2
-; P0-GFX9-GISEL-F32DENORM-NEXT: s_setpc_b64 s[30:31]
+; GFX9-GISEL-F32DENORM-LABEL: mul_fneg_mixed_uses:
+; GFX9-GISEL-F32DENORM: ; %bb.0:
+; GFX9-GISEL-F32DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-F32DENORM-NEXT: v_fma_f32 v2, v0, -v1, -v2
+; GFX9-GISEL-F32DENORM-NEXT: v_fma_f32 v1, v0, -v1, v3
+; GFX9-GISEL-F32DENORM-NEXT: v_mov_b32_e32 v0, v2
+; GFX9-GISEL-F32DENORM-NEXT: s_setpc_b64 s[30:31]
%mul = fmul contract float %a, %b
%neg = fneg contract float %mul
%sub = fsub contract float %neg, %c ; contractable
@@ -952,71 +946,55 @@ define { float, float } @mul_fneg_mixed_uses(float %a, float %b, float %c, float
; Should NOT contract -- one fneg user (fmul) is not contractable.
; Expected: v_mul + v_mul, no fma contraction.
define { float, float } @mul_fneg_mixed_uses_2(float %a, float %b, float %c, float %d) {
-; P0-GFX9-SDAG-F32FLUSH-LABEL: mul_fneg_mixed_uses_2:
-; P0-GFX9-SDAG-F32FLUSH: ; %bb.0:
-; P0-GFX9-SDAG-F32FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; P0-GFX9-SDAG-F32FLUSH-NEXT: v_mul_f32_e64 v4, v0, -v1
-; P0-GFX9-SDAG-F32FLUSH-NEXT: v_mad_f32 v0, v0, -v1, -v2
-; P0-GFX9-SDAG-F32FLUSH-NEXT: v_mul_f32_e32 v1, v4, v3
-; P0-GFX9-SDAG-F32FLUSH-NEXT: s_setpc_b64 s[30:31]
-;
-; P0-GFX9-GISEL-F32FLUSH-LABEL: mul_fneg_mixed_uses_2:
-; P0-GFX9-GISEL-F32FLUSH: ; %bb.0:
-; P0-GFX9-GISEL-F32FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; P0-GFX9-GISEL-F32FLUSH-NEXT: v_mul_f32_e64 v4, v0, -v1
-; P0-GFX9-GISEL-F32FLUSH-NEXT: v_mad_f32 v0, v0, -v1, -v2
-; P0-GFX9-GISEL-F32FLUSH-NEXT: v_mul_f32_e32 v1, v4, v3
-; P0-GFX9-GISEL-F32FLUSH-NEXT: s_setpc_b64 s[30:31]
-;
-; P0-GFX9_4-SDAG-LABEL: mul_fneg_mixed_uses_2:
-; P0-GFX9_4-SDAG: ; %bb.0:
-; P0-GFX9_4-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; P0-GFX9_4-SDAG-NEXT: v_mul_f32_e64 v4, v0, -v1
-; P0-GFX9_4-SDAG-NEXT: v_fma_f32 v0, -v0, v1, -v2
-; P0-GFX9_4-SDAG-NEXT: v_mul_f32_e32 v1, v4, v3
-; P0-GFX9_4-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX9-SDAG-LABEL: mul_fneg_mixed_uses_2:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_mul_f32_e64 v1, v0, -v1
+; GFX9-SDAG-NEXT: v_sub_f32_e32 v0, v1, v2
+; GFX9-SDAG-NEXT: v_mul_f32_e32 v1, v1, v3
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; P0-GFX9_4-GISEL-LABEL: mul_fneg_mixed_uses_2:
-; P0-GFX9_4-GISEL: ; %bb.0:
-; P0-GFX9_4-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; P0-GFX9_4-GISEL-NEXT: v_mul_f32_e64 v4, v0, -v1
-; P0-GFX9_4-GISEL-NEXT: v_fma_f32 v0, v0, -v1, -v2
-; P0-GFX9_4-GISEL-NEXT: v_mul_f32_e32 v1, v4, v3
-; P0-GFX9_4-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX9-GISEL-LABEL: mul_fneg_mixed_uses_2:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_mul_f32_e64 v1, v0, -v1
+; GFX9-GISEL-NEXT: v_sub_f32_e32 v0, v1, v2
+; GFX9-GISEL-NEXT: v_mul_f32_e32 v1, v1, v3
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; P0-GFX12_5-SDAG-LABEL: mul_fneg_mixed_uses_2:
-; P0-GFX12_5-SDAG: ; %bb.0:
-; P0-GFX12_5-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
-; P0-GFX12_5-SDAG-NEXT: s_wait_kmcnt 0x0
-; P0-GFX12_5-SDAG-NEXT: v_dual_mul_f32 v4, v0, -v1 :: v_dual_fma_f32 v0, -v0, v1, -v2
-; P0-GFX12_5-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; P0-GFX12_5-SDAG-NEXT: v_mul_f32_e32 v1, v4, v3
-; P0-GFX12_5-SDAG-NEXT: s_set_pc_i64 s[30:31]
+; GFX9_4-SDAG-LABEL: mul_fneg_mixed_uses_2:
+; GFX9_4-SDAG: ; %bb.0:
+; GFX9_4-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9_4-SDAG-NEXT: v_mul_f32_e64 v1, v0, -v1
+; GFX9_4-SDAG-NEXT: v_sub_f32_e32 v0, v1, v2
+; GFX9_4-SDAG-NEXT: v_mul_f32_e32 v1, v1, v3
+; GFX9_4-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; P0-GFX12_5-GISEL-LABEL: mul_fneg_mixed_uses_2:
-; P0-GFX12_5-GISEL: ; %bb.0:
-; P0-GFX12_5-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
-; P0-GFX12_5-GISEL-NEXT: s_wait_kmcnt 0x0
-; P0-GFX12_5-GISEL-NEXT: v_dual_mul_f32 v4, v0, -v1 :: v_dual_fma_f32 v0, v0, -v1, -v2
-; P0-GFX12_5-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; P0-GFX12_5-GISEL-NEXT: v_mul_f32_e32 v1, v4, v3
-; P0-GFX12_5-GISEL-NEXT: s_set_pc_i64 s[30:31]
+; GFX9_4-GISEL-LABEL: mul_fneg_mixed_uses_2:
+; GFX9_4-GISEL: ; %bb.0:
+; GFX9_4-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9_4-GISEL-NEXT: v_mul_f32_e64 v1, v0, -v1
+; GFX9_4-GISEL-NEXT: v_sub_f32_e32 v0, v1, v2
+; GFX9_4-GISEL-NEXT: v_mul_f32_e32 v1, v1, v3
+; GFX9_4-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; P0-GFX9-SDAG-F32DENORM-LABEL: mul_fneg_mixed_uses_2:
-; P0-GFX9-SDAG-F32DENORM: ; %bb.0:
-; P0-GFX9-SDAG-F32DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; P0-GFX9-SDAG-F32DENORM-NEXT: v_mul_f32_e64 v4, v0, -v1
-; P0-GFX9-SDAG-F32DENORM-NEXT: v_fma_f32 v0, -v0, v1, -v2
-; P0-GFX9-SDAG-F32DENORM-NEXT: v_mul_f32_e32 v1, v4, v3
-; P0-GFX9-SDAG-F32DENORM-NEXT: s_setpc_b64 s[30:31]
+; GFX12_5-SDAG-LABEL: mul_fneg_mixed_uses_2:
+; GFX12_5-SDAG: ; %bb.0:
+; GFX12_5-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12_5-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12_5-SDAG-NEXT: v_mul_f32_e64 v1, v0, -v1
+; GFX12_5-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12_5-SDAG-NEXT: v_dual_sub_f32 v0, v1, v2 :: v_dual_mul_f32 v1, v1, v3
+; GFX12_5-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
-; P0-GFX9-GISEL-F32DENORM-LABEL: mul_fneg_mixed_uses_2:
-; P0-GFX9-GISEL-F32DENORM: ; %bb.0:
-; P0-GFX9-GISEL-F32DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; P0-GFX9-GISEL-F32DENORM-NEXT: v_mul_f32_e64 v4, v0, -v1
-; P0-GFX9-GISEL-F32DENORM-NEXT: v_fma_f32 v0, v0, -v1, -v2
-; P0-GFX9-GISEL-F32DENORM-NEXT: v_mul_f32_e32 v1, v4, v3
-; P0-GFX9-GISEL-F32DENORM-NEXT: s_setpc_b64 s[30:31]
+; GFX12_5-GISEL-LABEL: mul_fneg_mixed_uses_2:
+; GFX12_5-GISEL: ; %bb.0:
+; GFX12_5-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12_5-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12_5-GISEL-NEXT: v_mul_f32_e64 v1, v0, -v1
+; GFX12_5-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12_5-GISEL-NEXT: v_dual_sub_f32 v0, v1, v2 :: v_dual_mul_f32 v1, v1, v3
+; GFX12_5-GISEL-NEXT: s_set_pc_i64 s[30:31]
%mul = fmul contract float %a, %b
%neg = fneg contract float %mul
%sub = fsub contract float %neg, %c ; contractable
@@ -1032,71 +1010,55 @@ define { float, float } @mul_fneg_mixed_uses_2(float %a, float %b, float %c, flo
; Should NOT contract -- one path (fneg -> fmul) is not contractable.
; Expected: v_mul shared by both paths, no fma contraction.
define { float, float } @mul_fneg_nonfsub_noncontractable(float %a, float %b, float %c, float %d) {
-; P0-GFX9-SDAG-F32FLUSH-LABEL: mul_fneg_nonfsub_noncontractable:
-; P0-GFX9-SDAG-F32FLUSH: ; %bb.0:
-; P0-GFX9-SDAG-F32FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; P0-GFX9-SDAG-F32FLUSH-NEXT: v_mul_f32_e32 v4, v0, v1
-; P0-GFX9-SDAG-F32FLUSH-NEXT: v_mad_f32 v0, v0, v1, v2
-; P0-GFX9-SDAG-F32FLUSH-NEXT: v_mul_f32_e64 v1, -v4, v3
-; P0-GFX9-SDAG-F32FLUSH-NEXT: s_setpc_b64 s[30:31]
-;
-; P0-GFX9-GISEL-F32FLUSH-LABEL: mul_fneg_nonfsub_noncontractable:
-; P0-GFX9-GISEL-F32FLUSH: ; %bb.0:
-; P0-GFX9-GISEL-F32FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; P0-GFX9-GISEL-F32FLUSH-NEXT: v_mul_f32_e64 v4, v0, -v1
-; P0-GFX9-GISEL-F32FLUSH-NEXT: v_mad_f32 v0, v0, v1, v2
-; P0-GFX9-GISEL-F32FLUSH-NEXT: v_mul_f32_e32 v1, v4, v3
-; P0-GFX9-GISEL-F32FLUSH-NEXT: s_setpc_b64 s[30:31]
-;
-; P0-GFX9_4-SDAG-LABEL: mul_fneg_nonfsub_noncontractable:
-; P0-GFX9_4-SDAG: ; %bb.0:
-; P0-GFX9_4-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; P0-GFX9_4-SDAG-NEXT: v_mul_f32_e64 v4, v0, -v1
-; P0-GFX9_4-SDAG-NEXT: v_fma_f32 v0, v0, v1, v2
-; P0-GFX9_4-SDAG-NEXT: v_mul_f32_e32 v1, v4, v3
-; P0-GFX9_4-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX9-SDAG-LABEL: mul_fneg_nonfsub_noncontractable:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_mul_f32_e32 v1, v0, v1
+; GFX9-SDAG-NEXT: v_add_f32_e32 v0, v1, v2
+; GFX9-SDAG-NEXT: v_mul_f32_e64 v1, -v1, v3
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; P0-GFX9_4-GISEL-LABEL: mul_fneg_nonfsub_noncontractable:
-; P0-GFX9_4-GISEL: ; %bb.0:
-; P0-GFX9_4-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; P0-GFX9_4-GISEL-NEXT: v_mul_f32_e64 v4, v0, -v1
-; P0-GFX9_4-GISEL-NEXT: v_fma_f32 v0, v0, v1, v2
-; P0-GFX9_4-GISEL-NEXT: v_mul_f32_e32 v1, v4, v3
-; P0-GFX9_4-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX9-GISEL-LABEL: mul_fneg_nonfsub_noncontractable:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_mul_f32_e32 v1, v0, v1
+; GFX9-GISEL-NEXT: v_add_f32_e32 v0, v1, v2
+; GFX9-GISEL-NEXT: v_mul_f32_e64 v1, -v1, v3
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; P0-GFX12_5-SDAG-LABEL: mul_fneg_nonfsub_noncontractable:
-; P0-GFX12_5-SDAG: ; %bb.0:
-; P0-GFX12_5-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
-; P0-GFX12_5-SDAG-NEXT: s_wait_kmcnt 0x0
-; P0-GFX12_5-SDAG-NEXT: v_dual_mul_f32 v4, v0, -v1 :: v_dual_fma_f32 v0, v0, v1, v2
-; P0-GFX12_5-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; P0-GFX12_5-SDAG-NEXT: v_mul_f32_e32 v1, v4, v3
-; P0-GFX12_5-SDAG-NEXT: s_set_pc_i64 s[30:31]
+; GFX9_4-SDAG-LABEL: mul_fneg_nonfsub_noncontractable:
+; GFX9_4-SDAG: ; %bb.0:
+; GFX9_4-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9_4-SDAG-NEXT: v_mul_f32_e32 v1, v0, v1
+; GFX9_4-SDAG-NEXT: v_add_f32_e32 v0, v1, v2
+; GFX9_4-SDAG-NEXT: v_mul_f32_e64 v1, -v1, v3
+; GFX9_4-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; P0-GFX12_5-GISEL-LABEL: mul_fneg_nonfsub_noncontractable:
-; P0-GFX12_5-GISEL: ; %bb.0:
-; P0-GFX12_5-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
-; P0-GFX12_5-GISEL-NEXT: s_wait_kmcnt 0x0
-; P0-GFX12_5-GISEL-NEXT: v_dual_mul_f32 v4, v0, -v1 :: v_dual_fma_f32 v0, v0, v1, v2
-; P0-GFX12_5-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; P0-GFX12_5-GISEL-NEXT: v_mul_f32_e32 v1, v4, v3
-; P0-GFX12_5-GISEL-NEXT: s_set_pc_i64 s[30:31]
+; GFX9_4-GISEL-LABEL: mul_fneg_nonfsub_noncontractable:
+; GFX9_4-GISEL: ; %bb.0:
+; GFX9_4-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9_4-GISEL-NEXT: v_mul_f32_e32 v1, v0, v1
+; GFX9_4-GISEL-NEXT: v_add_f32_e32 v0, v1, v2
+; GFX9_4-GISEL-NEXT: v_mul_f32_e64 v1, -v1, v3
+; GFX9_4-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; P0-GFX9-SDAG-F32DENORM-LABEL: mul_fneg_nonfsub_noncontractable:
-; P0-GFX9-SDAG-F32DENORM: ; %bb.0:
-; P0-GFX9-SDAG-F32DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; P0-GFX9-SDAG-F32DENORM-NEXT: v_mul_f32_e64 v4, v0, -v1
-; P0-GFX9-SDAG-F32DENORM-NEXT: v_fma_f32 v0, v0, v1, v2
-; P0-GFX9-SDAG-F32DENORM-NEXT: v_mul_f32_e32 v1, v4, v3
-; P0-GFX9-SDAG-F32DENORM-NEXT: s_setpc_b64 s[30:31]
+; GFX12_5-SDAG-LABEL: mul_fneg_nonfsub_noncontractable:
+; GFX12_5-SDAG: ; %bb.0:
+; GFX12_5-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12_5-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12_5-SDAG-NEXT: v_mul_f32_e32 v1, v0, v1
+; GFX12_5-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12_5-SDAG-NEXT: v_dual_add_f32 v0, v1, v2 :: v_dual_mul_f32 v1, -v1, v3
+; GFX12_5-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
-; P0-GFX9-GISEL-F32DENORM-LABEL: mul_fneg_nonfsub_noncontractable:
-; P0-GFX9-GISEL-F32DENORM: ; %bb.0:
-; P0-GFX9-GISEL-F32DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; P0-GFX9-GISEL-F32DENORM-NEXT: v_mul_f32_e64 v4, v0, -v1
-; P0-GFX9-GISEL-F32DENORM-NEXT: v_fma_f32 v0, v0, v1, v2
-; P0-GFX9-GISEL-F32DENORM-NEXT: v_mul_f32_e32 v1, v4, v3
-; P0-GFX9-GISEL-F32DENORM-NEXT: s_setpc_b64 s[30:31]
+; GFX12_5-GISEL-LABEL: mul_fneg_nonfsub_noncontractable:
+; GFX12_5-GISEL: ; %bb.0:
+; GFX12_5-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12_5-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12_5-GISEL-NEXT: v_mul_f32_e32 v1, v0, v1
+; GFX12_5-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12_5-GISEL-NEXT: v_dual_add_f32 v0, v1, v2 :: v_dual_mul_f32 v1, -v1, v3
+; GFX12_5-GISEL-NEXT: s_set_pc_i64 s[30:31]
%mul = fmul contract float %a, %b
%neg = fneg float %mul
%add = fadd contract float %mul, %c ; contractable
@@ -1112,71 +1074,71 @@ define { float, float } @mul_fneg_nonfsub_noncontractable(float %a, float %b, fl
; Should contract -- all paths are contractable.
; Expected: two fma/mad instructions, no v_mul.
define { float, float } @mul_direct_and_fneg_contractable_uses(float %a, float %b, float %c, float %d) {
-; P0-GFX9-SDAG-F32FLUSH-LABEL: mul_direct_and_fneg_contractable_uses:
-; P0-GFX9-SDAG-F32FLUSH: ; %bb.0:
-; P0-GFX9-SDAG-F32FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; P0-GFX9-SDAG-F32FLUSH-NEXT: v_mad_f32 v2, v0, v1, v2
-; P0-GFX9-SDAG-F32FLUSH-NEXT: v_mad_f32 v1, -v0, v1, -v3
-; P0-GFX9-SDAG-F32FLUSH-NEXT: v_mov_b32_e32 v0, v2
-; P0-GFX9-SDAG-F32FLUSH-NEXT: s_setpc_b64 s[30:31]
+; GFX9-SDAG-F32FLUSH-LABEL: mul_direct_and_fneg_contractable_uses:
+; GFX9-SDAG-F32FLUSH: ; %bb.0:
+; GFX9-SDAG-F32FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-F32FLUSH-NEXT: v_mad_f32 v2, v0, v1, v2
+; GFX9-SDAG-F32FLUSH-NEXT: v_mad_f32 v1, -v0, v1, -v3
+; GFX9-SDAG-F32FLUSH-NEXT: v_mov_b32_e32 v0, v2
+; GFX9-SDAG-F32FLUSH-NEXT: s_setpc_b64 s[30:31]
;
-; P0-GFX9-GISEL-F32FLUSH-LABEL: mul_direct_and_fneg_contractable_uses:
-; P0-GFX9-GISEL-F32FLUSH: ; %bb.0:
-; P0-GFX9-GISEL-F32FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; P0-GFX9-GISEL-F32FLUSH-NEXT: v_mad_f32 v2, v0, v1, v2
-; P0-GFX9-GISEL-F32FLUSH-NEXT: v_mad_f32 v1, v0, -v1, -v3
-; P0-GFX9-GISEL-F32FLUSH-NEXT: v_mov_b32_e32 v0, v2
-; P0-GFX9-GISEL-F32FLUSH-NEXT: s_setpc_b64 s[30:31]
+; GFX9-GISEL-F32FLUSH-LABEL: mul_direct_and_fneg_contractable_uses:
+; GFX9-GISEL-F32FLUSH: ; %bb.0:
+; GFX9-GISEL-F32FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-F32FLUSH-NEXT: v_mad_f32 v2, v0, v1, v2
+; GFX9-GISEL-F32FLUSH-NEXT: v_mad_f32 v1, v0, -v1, -v3
+; GFX9-GISEL-F32FLUSH-NEXT: v_mov_b32_e32 v0, v2
+; GFX9-GISEL-F32FLUSH-NEXT: s_setpc_b64 s[30:31]
;
-; P0-GFX9_4-SDAG-LABEL: mul_direct_and_fneg_contractable_uses:
-; P0-GFX9_4-SDAG: ; %bb.0:
-; P0-GFX9_4-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; P0-GFX9_4-SDAG-NEXT: v_fma_f32 v2, v0, v1, v2
-; P0-GFX9_4-SDAG-NEXT: v_fma_f32 v1, -v0, v1, -v3
-; P0-GFX9_4-SDAG-NEXT: v_mov_b32_e32 v0, v2
-; P0-GFX9_4-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX9_4-SDAG-LABEL: mul_direct_and_fneg_contractable_uses:
+; GFX9_4-SDAG: ; %bb.0:
+; GFX9_4-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9_4-SDAG-NEXT: v_fma_f32 v2, v0, v1, v2
+; GFX9_4-SDAG-NEXT: v_fma_f32 v1, -v0, v1, -v3
+; GFX9_4-SDAG-NEXT: v_mov_b32_e32 v0, v2
+; GFX9_4-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; P0-GFX9_4-GISEL-LABEL: mul_direct_and_fneg_contractable_uses:
-; P0-GFX9_4-GISEL: ; %bb.0:
-; P0-GFX9_4-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; P0-GFX9_4-GISEL-NEXT: v_fma_f32 v2, v0, v1, v2
-; P0-GFX9_4-GISEL-NEXT: v_fma_f32 v1, v0, -v1, -v3
-; P0-GFX9_4-GISEL-NEXT: v_mov_b32_e32 v0, v2
-; P0-GFX9_4-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX9_4-GISEL-LABEL: mul_direct_and_fneg_contractable_uses:
+; GFX9_4-GISEL: ; %bb.0:
+; GFX9_4-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9_4-GISEL-NEXT: v_fma_f32 v2, v0, v1, v2
+; GFX9_4-GISEL-NEXT: v_fma_f32 v1, v0, -v1, -v3
+; GFX9_4-GISEL-NEXT: v_mov_b32_e32 v0, v2
+; GFX9_4-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; P0-GFX12_5-SDAG-LABEL: mul_direct_and_fneg_contractable_uses:
-; P0-GFX12_5-SDAG: ; %bb.0:
-; P0-GFX12_5-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
-; P0-GFX12_5-SDAG-NEXT: s_wait_kmcnt 0x0
-; P0-GFX12_5-SDAG-NEXT: v_dual_fma_f32 v2, v0, v1, v2 :: v_dual_fma_f32 v1, -v0, v1, -v3
-; P0-GFX12_5-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; P0-GFX12_5-SDAG-NEXT: v_mov_b32_e32 v0, v2
-; P0-GFX12_5-SDAG-NEXT: s_set_pc_i64 s[30:31]
+; GFX12_5-SDAG-LABEL: mul_direct_and_fneg_contractable_uses:
+; GFX12_5-SDAG: ; %bb.0:
+; GFX12_5-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12_5-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12_5-SDAG-NEXT: v_dual_fma_f32 v2, v0, v1, v2 :: v_dual_fma_f32 v1, -v0, v1, -v3
+; GFX12_5-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12_5-SDAG-NEXT: v_mov_b32_e32 v0, v2
+; GFX12_5-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
-; P0-GFX12_5-GISEL-LABEL: mul_direct_and_fneg_contractable_uses:
-; P0-GFX12_5-GISEL: ; %bb.0:
-; P0-GFX12_5-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
-; P0-GFX12_5-GISEL-NEXT: s_wait_kmcnt 0x0
-; P0-GFX12_5-GISEL-NEXT: v_dual_fma_f32 v2, v0, v1, v2 :: v_dual_fma_f32 v1, v0, -v1, -v3
-; P0-GFX12_5-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; P0-GFX12_5-GISEL-NEXT: v_mov_b32_e32 v0, v2
-; P0-GFX12_5-GISEL-NEXT: s_set_pc_i64 s[30:31]
+; GFX12_5-GISEL-LABEL: mul_direct_and_fneg_contractable_uses:
+; GFX12_5-GISEL: ; %bb.0:
+; GFX12_5-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12_5-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12_5-GISEL-NEXT: v_dual_fma_f32 v2, v0, v1, v2 :: v_dual_fma_f32 v1, v0, -v1, -v3
+; GFX12_5-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12_5-GISEL-NEXT: v_mov_b32_e32 v0, v2
+; GFX12_5-GISEL-NEXT: s_set_pc_i64 s[30:31]
;
-; P0-GFX9-SDAG-F32DENORM-LABEL: mul_direct_and_fneg_contractable_uses:
-; P0-GFX9-SDAG-F32DENORM: ; %bb.0:
-; P0-GFX9-SDAG-F32DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; P0-GFX9-SDAG-F32DENORM-NEXT: v_fma_f32 v2, v0, v1, v2
-; P0-GFX9-SDAG-F32DENORM-NEXT: v_fma_f32 v1, -v0, v1, -v3
-; P0-GFX9-SDAG-F32DENORM-NEXT: v_mov_b32_e32 v0, v2
-; P0-GFX9-SDAG-F32DENORM-NEXT: s_setpc_b64 s[30:31]
+; GFX9-SDAG-F32DENORM-LABEL: mul_direct_and_fneg_contractable_uses:
+; GFX9-SDAG-F32DENORM: ; %bb.0:
+; GFX9-SDAG-F32DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-F32DENORM-NEXT: v_fma_f32 v2, v0, v1, v2
+; GFX9-SDAG-F32DENORM-NEXT: v_fma_f32 v1, -v0, v1, -v3
+; GFX9-SDAG-F32DENORM-NEXT: v_mov_b32_e32 v0, v2
+; GFX9-SDAG-F32DENORM-NEXT: s_setpc_b64 s[30:31]
;
-; P0-GFX9-GISEL-F32DENORM-LABEL: mul_direct_and_fneg_contractable_uses:
-; P0-GFX9-GISEL-F32DENORM: ; %bb.0:
-; P0-GFX9-GISEL-F32DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; P0-GFX9-GISEL-F32DENORM-NEXT: v_fma_f32 v2, v0, v1, v2
-; P0-GFX9-GISEL-F32DENORM-NEXT: v_fma_f32 v1, v0, -v1, -v3
-; P0-GFX9-GISEL-F32DENORM-NEXT: v_mov_b32_e32 v0, v2
-; P0-GFX9-GISEL-F32DENORM-NEXT: s_setpc_b64 s[30:31]
+; GFX9-GISEL-F32DENORM-LABEL: mul_direct_and_fneg_contractable_uses:
+; GFX9-GISEL-F32DENORM: ; %bb.0:
+; GFX9-GISEL-F32DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-F32DENORM-NEXT: v_fma_f32 v2, v0, v1, v2
+; GFX9-GISEL-F32DENORM-NEXT: v_fma_f32 v1, v0, -v1, -v3
+; GFX9-GISEL-F32DENORM-NEXT: v_mov_b32_e32 v0, v2
+; GFX9-GISEL-F32DENORM-NEXT: s_setpc_b64 s[30:31]
%mul = fmul contract float %a, %b
%add = fadd contract float %mul, %c ; contractable
%neg = fneg contract float %mul
@@ -1192,71 +1154,71 @@ define { float, float } @mul_direct_and_fneg_contractable_uses(float %a, float %
; Should contract -- all paths are contractable.
; Expected: two fma/mad instructions, no v_mul.
define { float, float } @mul_fsub_and_fneg_fsub_contractable(float %a, float %b, float %c, float %d) {
-; P0-GFX9-SDAG-F32FLUSH-LABEL: mul_fsub_and_fneg_fsub_contractable:
-; P0-GFX9-SDAG-F32FLUSH: ; %bb.0:
-; P0-GFX9-SDAG-F32FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; P0-GFX9-SDAG-F32FLUSH-NEXT: v_mad_f32 v2, v0, v1, -v2
-; P0-GFX9-SDAG-F32FLUSH-NEXT: v_mad_f32 v1, -v0, v1, -v3
-; P0-GFX9-SDAG-F32FLUSH-NEXT: v_mov_b32_e32 v0, v2
-; P0-GFX9-SDAG-F32FLUSH-NEXT: s_setpc_b64 s[30:31]
+; GFX9-SDAG-F32FLUSH-LABEL: mul_fsub_and_fneg_fsub_contractable:
+; GFX9-SDAG-F32FLUSH: ; %bb.0:
+; GFX9-SDAG-F32FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-F32FLUSH-NEXT: v_mad_f32 v2, v0, v1, -v2
+; GFX9-SDAG-F32FLUSH-NEXT: v_mad_f32 v1, -v0, v1, -v3
+; GFX9-SDAG-F32FLUSH-NEXT: v_mov_b32_e32 v0, v2
+; GFX9-SDAG-F32FLUSH-NEXT: s_setpc_b64 s[30:31]
;
-; P0-GFX9-GISEL-F32FLUSH-LABEL: mul_fsub_and_fneg_fsub_contractable:
-; P0-GFX9-GISEL-F32FLUSH: ; %bb.0:
-; P0-GFX9-GISEL-F32FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; P0-GFX9-GISEL-F32FLUSH-NEXT: v_mad_f32 v2, v0, v1, -v2
-; P0-GFX9-GISEL-F32FLUSH-NEXT: v_mad_f32 v1, v0, -v1, -v3
-; P0-GFX9-GISEL-F32FLUSH-NEXT: v_mov_b32_e32 v0, v2
-; P0-GFX9-GISEL-F32FLUSH-NEXT: s_setpc_b64 s[30:31]
+; GFX9-GISEL-F32FLUSH-LABEL: mul_fsub_and_fneg_fsub_contractable:
+; GFX9-GISEL-F32FLUSH: ; %bb.0:
+; GFX9-GISEL-F32FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-F32FLUSH-NEXT: v_mad_f32 v2, v0, v1, -v2
+; GFX9-GISEL-F32FLUSH-NEXT: v_mad_f32 v1, v0, -v1, -v3
+; GFX9-GISEL-F32FLUSH-NEXT: v_mov_b32_e32 v0, v2
+; GFX9-GISEL-F32FLUSH-NEXT: s_setpc_b64 s[30:31]
;
-; P0-GFX9_4-SDAG-LABEL: mul_fsub_and_fneg_fsub_contractable:
-; P0-GFX9_4-SDAG: ; %bb.0:
-; P0-GFX9_4-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; P0-GFX9_4-SDAG-NEXT: v_fma_f32 v2, v0, v1, -v2
-; P0-GFX9_4-SDAG-NEXT: v_fma_f32 v1, -v0, v1, -v3
-; P0-GFX9_4-SDAG-NEXT: v_mov_b32_e32 v0, v2
-; P0-GFX9_4-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX9_4-SDAG-LABEL: mul_fsub_and_fneg_fsub_contractable:
+; GFX9_4-SDAG: ; %bb.0:
+; GFX9_4-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9_4-SDAG-NEXT: v_fma_f32 v2, v0, v1, -v2
+; GFX9_4-SDAG-NEXT: v_fma_f32 v1, -v0, v1, -v3
+; GFX9_4-SDAG-NEXT: v_mov_b32_e32 v0, v2
+; GFX9_4-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; P0-GFX9_4-GISEL-LABEL: mul_fsub_and_fneg_fsub_contractable:
-; P0-GFX9_4-GISEL: ; %bb.0:
-; P0-GFX9_4-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; P0-GFX9_4-GISEL-NEXT: v_fma_f32 v2, v0, v1, -v2
-; P0-GFX9_4-GISEL-NEXT: v_fma_f32 v1, v0, -v1, -v3
-; P0-GFX9_4-GISEL-NEXT: v_mov_b32_e32 v0, v2
-; P0-GFX9_4-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX9_4-GISEL-LABEL: mul_fsub_and_fneg_fsub_contractable:
+; GFX9_4-GISEL: ; %bb.0:
+; GFX9_4-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9_4-GISEL-NEXT: v_fma_f32 v2, v0, v1, -v2
+; GFX9_4-GISEL-NEXT: v_fma_f32 v1, v0, -v1, -v3
+; GFX9_4-GISEL-NEXT: v_mov_b32_e32 v0, v2
+; GFX9_4-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; P0-GFX12_5-SDAG-LABEL: mul_fsub_and_fneg_fsub_contractable:
-; P0-GFX12_5-SDAG: ; %bb.0:
-; P0-GFX12_5-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
-; P0-GFX12_5-SDAG-NEXT: s_wait_kmcnt 0x0
-; P0-GFX12_5-SDAG-NEXT: v_dual_fma_f32 v2, v0, v1, -v2 :: v_dual_fma_f32 v1, -v0, v1, -v3
-; P0-GFX12_5-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; P0-GFX12_5-SDAG-NEXT: v_mov_b32_e32 v0, v2
-; P0-GFX12_5-SDAG-NEXT: s_set_pc_i64 s[30:31]
+; GFX12_5-SDAG-LABEL: mul_fsub_and_fneg_fsub_contractable:
+; GFX12_5-SDAG: ; %bb.0:
+; GFX12_5-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12_5-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12_5-SDAG-NEXT: v_dual_fma_f32 v2, v0, v1, -v2 :: v_dual_fma_f32 v1, -v0, v1, -v3
+; GFX12_5-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12_5-SDAG-NEXT: v_mov_b32_e32 v0, v2
+; GFX12_5-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
-; P0-GFX12_5-GISEL-LABEL: mul_fsub_and_fneg_fsub_contractable:
-; P0-GFX12_5-GISEL: ; %bb.0:
-; P0-GFX12_5-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
-; P0-GFX12_5-GISEL-NEXT: s_wait_kmcnt 0x0
-; P0-GFX12_5-GISEL-NEXT: v_dual_fma_f32 v2, v0, v1, -v2 :: v_dual_fma_f32 v1, v0, -v1, -v3
-; P0-GFX12_5-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; P0-GFX12_5-GISEL-NEXT: v_mov_b32_e32 v0, v2
-; P0-GFX12_5-GISEL-NEXT: s_set_pc_i64 s[30:31]
+; GFX12_5-GISEL-LABEL: mul_fsub_and_fneg_fsub_contractable:
+; GFX12_5-GISEL: ; %bb.0:
+; GFX12_5-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12_5-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12_5-GISEL-NEXT: v_dual_fma_f32 v2, v0, v1, -v2 :: v_dual_fma_f32 v1, v0, -v1, -v3
+; GFX12_5-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12_5-GISEL-NEXT: v_mov_b32_e32 v0, v2
+; GFX12_5-GISEL-NEXT: s_set_pc_i64 s[30:31]
;
-; P0-GFX9-SDAG-F32DENORM-LABEL: mul_fsub_and_fneg_fsub_contractable:
-; P0-GFX9-SDAG-F32DENORM: ; %bb.0:
-; P0-GFX9-SDAG-F32DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; P0-GFX9-SDAG-F32DENORM-NEXT: v_fma_f32 v2, v0, v1, -v2
-; P0-GFX9-SDAG-F32DENORM-NEXT: v_fma_f32 v1, -v0, v1, -v3
-; P0-GFX9-SDAG-F32DENORM-NEXT: v_mov_b32_e32 v0, v2
-; P0-GFX9-SDAG-F32DENORM-NEXT: s_setpc_b64 s[30:31]
+; GFX9-SDAG-F32DENORM-LABEL: mul_fsub_and_fneg_fsub_contractable:
+; GFX9-SDAG-F32DENORM: ; %bb.0:
+; GFX9-SDAG-F32DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-F32DENORM-NEXT: v_fma_f32 v2, v0, v1, -v2
+; GFX9-SDAG-F32DENORM-NEXT: v_fma_f32 v1, -v0, v1, -v3
+; GFX9-SDAG-F32DENORM-NEXT: v_mov_b32_e32 v0, v2
+; GFX9-SDAG-F32DENORM-NEXT: s_setpc_b64 s[30:31]
;
-; P0-GFX9-GISEL-F32DENORM-LABEL: mul_fsub_and_fneg_fsub_contractable:
-; P0-GFX9-GISEL-F32DENORM: ; %bb.0:
-; P0-GFX9-GISEL-F32DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; P0-GFX9-GISEL-F32DENORM-NEXT: v_fma_f32 v2, v0, v1, -v2
-; P0-GFX9-GISEL-F32DENORM-NEXT: v_fma_f32 v1, v0, -v1, -v3
-; P0-GFX9-GISEL-F32DENORM-NEXT: v_mov_b32_e32 v0, v2
-; P0-GFX9-GISEL-F32DENORM-NEXT: s_setpc_b64 s[30:31]
+; GFX9-GISEL-F32DENORM-LABEL: mul_fsub_and_fneg_fsub_contractable:
+; GFX9-GISEL-F32DENORM: ; %bb.0:
+; GFX9-GISEL-F32DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-F32DENORM-NEXT: v_fma_f32 v2, v0, v1, -v2
+; GFX9-GISEL-F32DENORM-NEXT: v_fma_f32 v1, v0, -v1, -v3
+; GFX9-GISEL-F32DENORM-NEXT: v_mov_b32_e32 v0, v2
+; GFX9-GISEL-F32DENORM-NEXT: s_setpc_b64 s[30:31]
%mul = fmul contract float %a, %b
%sub1 = fsub contract float %mul, %c ; contractable
%neg = fneg contract float %mul
diff --git a/llvm/test/CodeGen/AMDGPU/mad-combine.ll b/llvm/test/CodeGen/AMDGPU/mad-combine.ll
index a1bbe00635ed9..cf6732d30d080 100644
--- a/llvm/test/CodeGen/AMDGPU/mad-combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad-combine.ll
@@ -960,12 +960,11 @@ define amdgpu_kernel void @combine_to_mad_fsub_2_f32_2uses_mul(ptr addrspace(1)
; SI-STD-NEXT: buffer_load_dword v5, v[0:1], s[4:7], 0 addr64 offset:12 glc
; SI-STD-NEXT: s_waitcnt vmcnt(0)
; SI-STD-NEXT: s_mov_b64 s[2:3], s[6:7]
-; SI-STD-NEXT: v_mul_f32_e32 v6, v2, v3
-; SI-STD-NEXT: v_mad_f32 v2, -v2, v3, -v4
-; SI-STD-NEXT: v_sub_f32_e32 v3, v6, v5
-; SI-STD-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; SI-STD-NEXT: v_mad_f32 v4, -v2, v3, -v4
+; SI-STD-NEXT: v_mad_f32 v2, v2, v3, -v5
+; SI-STD-NEXT: buffer_store_dword v4, v[0:1], s[0:3], 0 addr64
; SI-STD-NEXT: s_waitcnt vmcnt(0)
-; SI-STD-NEXT: buffer_store_dword v3, v[0:1], s[0:3], 0 addr64 offset:4
+; SI-STD-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 offset:4
; SI-STD-NEXT: s_waitcnt vmcnt(0)
; SI-STD-NEXT: s_endpgm
;
More information about the llvm-branch-commits
mailing list