[llvm-branch-commits] [llvm] [DAGCombiner][GlobalISel] Extend allMulUsesCanBeContracted with FNEG pattern (PR #188115)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Apr 3 08:57:17 PDT 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-globalisel
Author: Adel Ejjeh (adelejjeh)
<details>
<summary>Changes</summary>
Supersedes #<!-- -->169735 (PR 3/5). Split into a stack of 5 PRs per reviewer request.
---
Extend `allMulUsesCanBeContracted()` to recognize `fmul -> fneg -> fsub` chains as contractable uses. This allows FMA contraction when a multiply feeds an fneg that is only used by fsub operations.
Changes:
- **DAGCombiner.cpp**: Add `ISD::FNEG` case to `allMulUsesCanBeContracted()` checking that all FNEG users are `ISD::FSUB`. Update 1 fold site guard in `visitFSUBForFMACombine` (`fsub(fneg(fmul))`).
- **CombinerHelper.cpp**: Add `G_FNEG` case to `allMulUsesCanBeContracted()` checking that all FNEG users are `G_FSUB`. Update 2 fold site guards in `matchCombineFSubFNegFMulToFMadOrFMA`. Fix guard ordering to check `isContractableFMul` before `allMulUsesCanBeContracted` (cheap first).
Note: FADD is intentionally not checked as an FNEG user because `fadd(fneg(x), y)` is canonicalized to `fsub(y, x)` before FMA combine runs in both SDAG (`visitFSUB`) and GISel (`redundant_neg_operands`). FPEXT inside FNEG chains is deferred to the next patch.
### Test Changes
#### `fma-multiple-uses-contraction.ll`
The FNEG patterns section is activated: `P0-` prefixed baseline checks are replaced with real-prefix CHECK lines reflecting the guard changes from this PR. Remaining sections (FPEXT, FMA/FMAD) retain `P0-` prefixed baseline checks, activated by later PRs.
#### `mad-combine.ll`
`combine_to_mad_fsub_2_f32_2uses_mul`: CHECK lines revert to pre-stack (upstream) behavior. This function's `fmul` feeds an `fneg` used by `fsub`, which is now recognized as contractable by the FNEG guard added in this PR, restoring the contraction that was temporarily blocked by PR 2/5.
---
_This PR was split from the original_ #<!-- -->169735 _with the assistance of_ [_Claude Code](https://claude.ai/code)._
---
Patch is 50.65 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/188115.diff
4 Files Affected:
- (modified) llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp (+25-7)
- (modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+18-2)
- (modified) llvm/test/CodeGen/AMDGPU/fma-multiple-uses-contraction.ll (+362-400)
- (modified) llvm/test/CodeGen/AMDGPU/mad-combine.ll (+4-5)
``````````diff
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 5fdba8cd4ab99..2e9355013e84d 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -6316,8 +6316,9 @@ static bool hasMoreUses(const MachineInstr &MI0, const MachineInstr &MI1,
/// would duplicate the multiply without reducing the total number of
/// operations.
///
-/// Currently checks for the following pattern:
+/// Currently checks for the following patterns:
/// - fmul --> fadd/fsub: Direct contraction
+/// - fmul --> fneg --> fsub: Contraction through fneg
bool CombinerHelper::allMulUsesCanBeContracted(const MachineInstr &MI) const {
Register MulReg = MI.getOperand(0).getReg();
@@ -6328,6 +6329,17 @@ bool CombinerHelper::allMulUsesCanBeContracted(const MachineInstr &MI) const {
if (Opcode == TargetOpcode::G_FADD || Opcode == TargetOpcode::G_FSUB)
continue;
+ // G_FNEG use - contractable if all users of the fneg are G_FSUB.
+ if (Opcode == TargetOpcode::G_FNEG) {
+ Register FNegReg = UseMI.getOperand(0).getReg();
+ for (const MachineInstr &FNegUser : MRI.use_nodbg_instructions(FNegReg)) {
+ unsigned FNegUserOp = FNegUser.getOpcode();
+ if (FNegUserOp != TargetOpcode::G_FSUB)
+ return false;
+ }
+ continue;
+ }
+
// Any other use type is not currently recognized as contractable.
return false;
}
@@ -6744,9 +6756,10 @@ bool CombinerHelper::matchCombineFSubFNegFMulToFMadOrFMA(
MachineInstr *FMulMI;
// fold (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
if (mi_match(LHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
- (Aggressive || (MRI.hasOneNonDBGUse(LHSReg) &&
- MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
- isContractableFMul(*FMulMI, AllowFusionGlobally)) {
+ isContractableFMul(*FMulMI, AllowFusionGlobally) &&
+ ((MRI.hasOneNonDBGUse(LHSReg) &&
+ MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg())) ||
+ (Aggressive && allMulUsesCanBeContracted(*FMulMI)))) {
MatchInfo = [=, &MI](MachineIRBuilder &B) {
Register NegX =
B.buildFNeg(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
@@ -6758,10 +6771,15 @@ bool CombinerHelper::matchCombineFSubFNegFMulToFMadOrFMA(
}
// fold (fsub x, (fneg (fmul, y, z))) -> (fma y, z, x)
+ // Note: In the standard combiner ordering, redundant_neg_operands
+ // canonicalizes fsub(x, fneg(y)) -> fadd(x, y) before fma_combines runs,
+ // so this fold may not fire in practice. It is kept as defensive code
+ // against combiner reordering.
if (mi_match(RHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
- (Aggressive || (MRI.hasOneNonDBGUse(RHSReg) &&
- MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
- isContractableFMul(*FMulMI, AllowFusionGlobally)) {
+ isContractableFMul(*FMulMI, AllowFusionGlobally) &&
+ ((MRI.hasOneNonDBGUse(RHSReg) &&
+ MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg())) ||
+ (Aggressive && allMulUsesCanBeContracted(*FMulMI)))) {
MatchInfo = [=, &MI](MachineIRBuilder &B) {
B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
{FMulMI->getOperand(1).getReg(),
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 38cacf0d52182..3d34a1226df27 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -17685,8 +17685,9 @@ static bool isFusedOp(const MatchContextClass &Matcher, SDValue N) {
/// would duplicate the multiply without reducing the total number of
/// operations.
///
-/// Currently checks for the following pattern:
+/// Currently checks for the following patterns:
/// - fmul --> fadd/fsub: Direct contraction
+/// - fmul --> fneg --> fsub: Contraction through fneg
static bool allMulUsesCanBeContracted(SDValue Mul) {
for (const auto *User : Mul->users()) {
unsigned Opcode = User->getOpcode();
@@ -17695,6 +17696,16 @@ static bool allMulUsesCanBeContracted(SDValue Mul) {
if (Opcode == ISD::FADD || Opcode == ISD::FSUB)
continue;
+ // FNEG use - contractable if all users of the fneg are FSUB.
+ if (Opcode == ISD::FNEG) {
+ for (const auto *FNegUser : User->users()) {
+ unsigned FNegUserOp = FNegUser->getOpcode();
+ if (FNegUserOp != ISD::FSUB)
+ return false;
+ }
+ continue;
+ }
+
// Any other use type is not currently recognized as contractable.
return false;
}
@@ -18030,8 +18041,13 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
}
// fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
+ // Note: SDAG does not need the symmetric fold (fsub x, (fneg (fmul y, z)))
+ // because visitFSUB canonicalizes fsub(A, fneg(B)) -> fadd(A, B) before
+ // calling visitFSUBForFMACombine, so that pattern is handled by
+ // visitFADDForFMACombine instead.
if (matcher.match(N0, ISD::FNEG) && isContractableFMUL(N0.getOperand(0)) &&
- (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
+ ((N0->hasOneUse() && N0.getOperand(0).hasOneUse()) ||
+ (Aggressive && allMulUsesCanBeContracted(N0.getOperand(0))))) {
SDValue N00 = N0.getOperand(0).getOperand(0);
SDValue N01 = N0.getOperand(0).getOperand(1);
return matcher.getNode(PreferredFusedOpcode, SL, VT,
diff --git a/llvm/test/CodeGen/AMDGPU/fma-multiple-uses-contraction.ll b/llvm/test/CodeGen/AMDGPU/fma-multiple-uses-contraction.ll
index 803561b2b29a6..99dabe9961033 100644
--- a/llvm/test/CodeGen/AMDGPU/fma-multiple-uses-contraction.ll
+++ b/llvm/test/CodeGen/AMDGPU/fma-multiple-uses-contraction.ll
@@ -714,15 +714,9 @@ define { float, float, float } @mul_three_contractable_uses(float %a, float %b,
ret { float, float, float } %ret2
}
-
; ==========================================================================
; FNEG patterns
; Tests for allMulUsesCanBeContracted recognizing fneg as a transparent user.
-;
-; NOTE: The allMulUsesCanBeContracted guard does not yet recognize fneg as
-; transparent. That support is added by the next patch in the series. Until
-; then, the CHECK lines below reflect current (potentially over-conservative)
-; codegen and may not match the "Expected:" comments on individual tests.
; ==========================================================================
; Test case: fmul -> fneg -> fsub (single use chain).
@@ -731,55 +725,55 @@ define { float, float, float } @mul_three_contractable_uses(float %a, float %b,
; Should contract -- single-use chain, fneg folds into fma.
; Expected: single fma/mad, no v_mul.
define float @mul_fneg_fsub_single_use(float %a, float %b, float %c) {
-; P0-GFX9-SDAG-F32FLUSH-LABEL: mul_fneg_fsub_single_use:
-; P0-GFX9-SDAG-F32FLUSH: ; %bb.0:
-; P0-GFX9-SDAG-F32FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; P0-GFX9-SDAG-F32FLUSH-NEXT: v_mad_f32 v0, v0, -v1, -v2
-; P0-GFX9-SDAG-F32FLUSH-NEXT: s_setpc_b64 s[30:31]
+; GFX9-SDAG-F32FLUSH-LABEL: mul_fneg_fsub_single_use:
+; GFX9-SDAG-F32FLUSH: ; %bb.0:
+; GFX9-SDAG-F32FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-F32FLUSH-NEXT: v_mad_f32 v0, v0, -v1, -v2
+; GFX9-SDAG-F32FLUSH-NEXT: s_setpc_b64 s[30:31]
;
-; P0-GFX9-GISEL-F32FLUSH-LABEL: mul_fneg_fsub_single_use:
-; P0-GFX9-GISEL-F32FLUSH: ; %bb.0:
-; P0-GFX9-GISEL-F32FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; P0-GFX9-GISEL-F32FLUSH-NEXT: v_mad_f32 v0, v0, -v1, -v2
-; P0-GFX9-GISEL-F32FLUSH-NEXT: s_setpc_b64 s[30:31]
+; GFX9-GISEL-F32FLUSH-LABEL: mul_fneg_fsub_single_use:
+; GFX9-GISEL-F32FLUSH: ; %bb.0:
+; GFX9-GISEL-F32FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-F32FLUSH-NEXT: v_mad_f32 v0, v0, -v1, -v2
+; GFX9-GISEL-F32FLUSH-NEXT: s_setpc_b64 s[30:31]
;
-; P0-GFX9_4-SDAG-LABEL: mul_fneg_fsub_single_use:
-; P0-GFX9_4-SDAG: ; %bb.0:
-; P0-GFX9_4-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; P0-GFX9_4-SDAG-NEXT: v_fma_f32 v0, -v0, v1, -v2
-; P0-GFX9_4-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX9_4-SDAG-LABEL: mul_fneg_fsub_single_use:
+; GFX9_4-SDAG: ; %bb.0:
+; GFX9_4-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9_4-SDAG-NEXT: v_fma_f32 v0, -v0, v1, -v2
+; GFX9_4-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; P0-GFX9_4-GISEL-LABEL: mul_fneg_fsub_single_use:
-; P0-GFX9_4-GISEL: ; %bb.0:
-; P0-GFX9_4-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; P0-GFX9_4-GISEL-NEXT: v_fma_f32 v0, v0, -v1, -v2
-; P0-GFX9_4-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX9_4-GISEL-LABEL: mul_fneg_fsub_single_use:
+; GFX9_4-GISEL: ; %bb.0:
+; GFX9_4-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9_4-GISEL-NEXT: v_fma_f32 v0, v0, -v1, -v2
+; GFX9_4-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; P0-GFX12_5-SDAG-LABEL: mul_fneg_fsub_single_use:
-; P0-GFX12_5-SDAG: ; %bb.0:
-; P0-GFX12_5-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
-; P0-GFX12_5-SDAG-NEXT: s_wait_kmcnt 0x0
-; P0-GFX12_5-SDAG-NEXT: v_fma_f32 v0, -v0, v1, -v2
-; P0-GFX12_5-SDAG-NEXT: s_set_pc_i64 s[30:31]
+; GFX12_5-SDAG-LABEL: mul_fneg_fsub_single_use:
+; GFX12_5-SDAG: ; %bb.0:
+; GFX12_5-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12_5-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12_5-SDAG-NEXT: v_fma_f32 v0, -v0, v1, -v2
+; GFX12_5-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
-; P0-GFX12_5-GISEL-LABEL: mul_fneg_fsub_single_use:
-; P0-GFX12_5-GISEL: ; %bb.0:
-; P0-GFX12_5-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
-; P0-GFX12_5-GISEL-NEXT: s_wait_kmcnt 0x0
-; P0-GFX12_5-GISEL-NEXT: v_fma_f32 v0, v0, -v1, -v2
-; P0-GFX12_5-GISEL-NEXT: s_set_pc_i64 s[30:31]
+; GFX12_5-GISEL-LABEL: mul_fneg_fsub_single_use:
+; GFX12_5-GISEL: ; %bb.0:
+; GFX12_5-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12_5-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12_5-GISEL-NEXT: v_fma_f32 v0, v0, -v1, -v2
+; GFX12_5-GISEL-NEXT: s_set_pc_i64 s[30:31]
;
-; P0-GFX9-SDAG-F32DENORM-LABEL: mul_fneg_fsub_single_use:
-; P0-GFX9-SDAG-F32DENORM: ; %bb.0:
-; P0-GFX9-SDAG-F32DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; P0-GFX9-SDAG-F32DENORM-NEXT: v_fma_f32 v0, -v0, v1, -v2
-; P0-GFX9-SDAG-F32DENORM-NEXT: s_setpc_b64 s[30:31]
+; GFX9-SDAG-F32DENORM-LABEL: mul_fneg_fsub_single_use:
+; GFX9-SDAG-F32DENORM: ; %bb.0:
+; GFX9-SDAG-F32DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-F32DENORM-NEXT: v_fma_f32 v0, -v0, v1, -v2
+; GFX9-SDAG-F32DENORM-NEXT: s_setpc_b64 s[30:31]
;
-; P0-GFX9-GISEL-F32DENORM-LABEL: mul_fneg_fsub_single_use:
-; P0-GFX9-GISEL-F32DENORM: ; %bb.0:
-; P0-GFX9-GISEL-F32DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; P0-GFX9-GISEL-F32DENORM-NEXT: v_fma_f32 v0, v0, -v1, -v2
-; P0-GFX9-GISEL-F32DENORM-NEXT: s_setpc_b64 s[30:31]
+; GFX9-GISEL-F32DENORM-LABEL: mul_fneg_fsub_single_use:
+; GFX9-GISEL-F32DENORM: ; %bb.0:
+; GFX9-GISEL-F32DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-F32DENORM-NEXT: v_fma_f32 v0, v0, -v1, -v2
+; GFX9-GISEL-F32DENORM-NEXT: s_setpc_b64 s[30:31]
%mul = fmul contract float %a, %b
%neg = fneg contract float %mul
%sub = fsub contract float %neg, %c ; contractable
@@ -792,71 +786,71 @@ define float @mul_fneg_fsub_single_use(float %a, float %b, float %c) {
; Should contract -- all fneg uses are contractable fsubs.
; Expected: two fma/mad instructions, no v_mul.
define { float, float } @mul_fneg_multiple_fsub_uses(float %a, float %b, float %c, float %d) {
-; P0-GFX9-SDAG-F32FLUSH-LABEL: mul_fneg_multiple_fsub_uses:
-; P0-GFX9-SDAG-F32FLUSH: ; %bb.0:
-; P0-GFX9-SDAG-F32FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; P0-GFX9-SDAG-F32FLUSH-NEXT: v_mad_f32 v2, v0, -v1, -v2
-; P0-GFX9-SDAG-F32FLUSH-NEXT: v_mad_f32 v1, v0, -v1, -v3
-; P0-GFX9-SDAG-F32FLUSH-NEXT: v_mov_b32_e32 v0, v2
-; P0-GFX9-SDAG-F32FLUSH-NEXT: s_setpc_b64 s[30:31]
+; GFX9-SDAG-F32FLUSH-LABEL: mul_fneg_multiple_fsub_uses:
+; GFX9-SDAG-F32FLUSH: ; %bb.0:
+; GFX9-SDAG-F32FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-F32FLUSH-NEXT: v_mad_f32 v2, v0, -v1, -v2
+; GFX9-SDAG-F32FLUSH-NEXT: v_mad_f32 v1, v0, -v1, -v3
+; GFX9-SDAG-F32FLUSH-NEXT: v_mov_b32_e32 v0, v2
+; GFX9-SDAG-F32FLUSH-NEXT: s_setpc_b64 s[30:31]
;
-; P0-GFX9-GISEL-F32FLUSH-LABEL: mul_fneg_multiple_fsub_uses:
-; P0-GFX9-GISEL-F32FLUSH: ; %bb.0:
-; P0-GFX9-GISEL-F32FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; P0-GFX9-GISEL-F32FLUSH-NEXT: v_mad_f32 v2, v0, -v1, -v2
-; P0-GFX9-GISEL-F32FLUSH-NEXT: v_mad_f32 v1, v0, -v1, -v3
-; P0-GFX9-GISEL-F32FLUSH-NEXT: v_mov_b32_e32 v0, v2
-; P0-GFX9-GISEL-F32FLUSH-NEXT: s_setpc_b64 s[30:31]
+; GFX9-GISEL-F32FLUSH-LABEL: mul_fneg_multiple_fsub_uses:
+; GFX9-GISEL-F32FLUSH: ; %bb.0:
+; GFX9-GISEL-F32FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-F32FLUSH-NEXT: v_mad_f32 v2, v0, -v1, -v2
+; GFX9-GISEL-F32FLUSH-NEXT: v_mad_f32 v1, v0, -v1, -v3
+; GFX9-GISEL-F32FLUSH-NEXT: v_mov_b32_e32 v0, v2
+; GFX9-GISEL-F32FLUSH-NEXT: s_setpc_b64 s[30:31]
;
-; P0-GFX9_4-SDAG-LABEL: mul_fneg_multiple_fsub_uses:
-; P0-GFX9_4-SDAG: ; %bb.0:
-; P0-GFX9_4-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; P0-GFX9_4-SDAG-NEXT: v_fma_f32 v2, -v0, v1, -v2
-; P0-GFX9_4-SDAG-NEXT: v_fma_f32 v1, -v0, v1, -v3
-; P0-GFX9_4-SDAG-NEXT: v_mov_b32_e32 v0, v2
-; P0-GFX9_4-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX9_4-SDAG-LABEL: mul_fneg_multiple_fsub_uses:
+; GFX9_4-SDAG: ; %bb.0:
+; GFX9_4-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9_4-SDAG-NEXT: v_fma_f32 v2, -v0, v1, -v2
+; GFX9_4-SDAG-NEXT: v_fma_f32 v1, -v0, v1, -v3
+; GFX9_4-SDAG-NEXT: v_mov_b32_e32 v0, v2
+; GFX9_4-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; P0-GFX9_4-GISEL-LABEL: mul_fneg_multiple_fsub_uses:
-; P0-GFX9_4-GISEL: ; %bb.0:
-; P0-GFX9_4-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; P0-GFX9_4-GISEL-NEXT: v_fma_f32 v2, v0, -v1, -v2
-; P0-GFX9_4-GISEL-NEXT: v_fma_f32 v1, v0, -v1, -v3
-; P0-GFX9_4-GISEL-NEXT: v_mov_b32_e32 v0, v2
-; P0-GFX9_4-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX9_4-GISEL-LABEL: mul_fneg_multiple_fsub_uses:
+; GFX9_4-GISEL: ; %bb.0:
+; GFX9_4-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9_4-GISEL-NEXT: v_fma_f32 v2, v0, -v1, -v2
+; GFX9_4-GISEL-NEXT: v_fma_f32 v1, v0, -v1, -v3
+; GFX9_4-GISEL-NEXT: v_mov_b32_e32 v0, v2
+; GFX9_4-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; P0-GFX12_5-SDAG-LABEL: mul_fneg_multiple_fsub_uses:
-; P0-GFX12_5-SDAG: ; %bb.0:
-; P0-GFX12_5-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
-; P0-GFX12_5-SDAG-NEXT: s_wait_kmcnt 0x0
-; P0-GFX12_5-SDAG-NEXT: v_dual_fma_f32 v2, -v0, v1, -v2 :: v_dual_fma_f32 v1, -v0, v1, -v3
-; P0-GFX12_5-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; P0-GFX12_5-SDAG-NEXT: v_mov_b32_e32 v0, v2
-; P0-GFX12_5-SDAG-NEXT: s_set_pc_i64 s[30:31]
+; GFX12_5-SDAG-LABEL: mul_fneg_multiple_fsub_uses:
+; GFX12_5-SDAG: ; %bb.0:
+; GFX12_5-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12_5-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12_5-SDAG-NEXT: v_dual_fma_f32 v2, -v0, v1, -v2 :: v_dual_fma_f32 v1, -v0, v1, -v3
+; GFX12_5-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12_5-SDAG-NEXT: v_mov_b32_e32 v0, v2
+; GFX12_5-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
-; P0-GFX12_5-GISEL-LABEL: mul_fneg_multiple_fsub_uses:
-; P0-GFX12_5-GISEL: ; %bb.0:
-; P0-GFX12_5-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
-; P0-GFX12_5-GISEL-NEXT: s_wait_kmcnt 0x0
-; P0-GFX12_5-GISEL-NEXT: v_dual_fma_f32 v2, v0, -v1, -v2 :: v_dual_fma_f32 v1, v0, -v1, -v3
-; P0-GFX12_5-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; P0-GFX12_5-GISEL-NEXT: v_mov_b32_e32 v0, v2
-; P0-GFX12_5-GISEL-NEXT: s_set_pc_i64 s[30:31]
+; GFX12_5-GISEL-LABEL: mul_fneg_multiple_fsub_uses:
+; GFX12_5-GISEL: ; %bb.0:
+; GFX12_5-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12_5-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12_5-GISEL-NEXT: v_dual_fma_f32 v2, v0, -v1, -v2 :: v_dual_fma_f32 v1, v0, -v1, -v3
+; GFX12_5-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12_5-GISEL-NEXT: v_mov_b32_e32 v0, v2
+; GFX12_5-GISEL-NEXT: s_set_pc_i64 s[30:31]
;
-; P0-GFX9-SDAG-F32DENORM-LABEL: mul_fneg_multiple_fsub_uses:
-; P0-GFX9-SDAG-F32DENORM: ; %bb.0:
-; P0-GFX9-SDAG-F32DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; P0-GFX9-SDAG-F32DENORM-NEXT: v_fma_f32 v2, -v0, v1, -v2
-; P0-GFX9-SDAG-F32DENORM-NEXT: v_fma_f32 v1, -v0, v1, -v3
-; P0-GFX9-SDAG-F32DENORM-NEXT: v_mov_b32_e32 v0, v2
-; P0-GFX9-SDAG-F32DENORM-NEXT: s_setpc_b64 s[30:31]
+; GFX9-SDAG-F32DENORM-LABEL: mul_fneg_multiple_fsub_uses:
+; GFX9-SDAG-F32DENORM: ; %bb.0:
+; GFX9-SDAG-F32DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-F32DENORM-NEXT: v_fma_f32 v2, -v0, v1, -v2
+; GFX9-SDAG-F32DENORM-NEXT: v_fma_f32 v1, -v0, v1, -v3
+; GFX9-SDAG-F32DENORM-NEXT: v_mov_b32_e32 v0, v2
+; GFX9-SDAG-F32DENORM-NEXT: s_setpc_b64 s[30:31]
;
-; P0-GFX9-GISEL-F32DENORM-LABEL: mul_fneg_multiple_fsub_uses:
-; P0-GFX9-GISEL-F32DENORM: ; %bb.0:
-; P0-GFX9-GISEL-F32DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; P0-GFX9-GISEL-F32DENORM-NEXT: v_fma_f32 v2, v0, -v1, -v2
-; P0-GFX9-GISEL-F32DENORM-NEXT: v_fma_f32 v1, v0, -v1, -v3
-; P0-GFX9-GISEL-F32DENORM-NEXT: v_mov_b32_e32 v0, v2
-; P0-GFX9-GISEL-F32DENORM-NEXT: s_setpc_b64 s[30:31]
+; GFX9-GISEL-F32DENORM-LABEL: mul_fneg_multiple_fsub_uses:
+; GFX9-GISEL-F32DENORM: ; %bb.0:
+; GFX9-GISEL-F32DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-F32DENORM-NEXT: v_fma_f32 v2, v0, -v1, -v2
+; GFX9-GISEL-F32DENORM-NEXT: v_fma_f32 v1, v0, -v1, -v3
+; GFX9-GISEL-F32DENORM-NEXT: v_mov_b32_e32 v0, v2
+; GFX9-GISEL-F32DENORM-NEXT: s_setpc_b64 s[30:31]
%mul = fmul contract float %a, %b
%neg = fneg contract float %mul
%sub1 = fsub contract float %neg, %c ; contractable
@@ -872,71 +866,71 @@ define { float, float } @mul_fneg_multiple_fsub_uses(float %a, float %b, float %
; Should contract -- both fneg uses (fsub, fadd) are contractable.
; Expected: two fma/mad instructions, no v_mul.
define { float, float } @mul_fneg_mixed_uses(float %a, float %b, float %c, float %d) {
-; P0-GFX9-SDAG-F32FLUSH-LABEL: mul_fneg_mixed_uses:
-; P0-GFX9-SDAG-F32FLUSH: ; %bb.0:
-; P0-GFX9-SDAG-F32FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; P0-GFX9-SDAG-F32FLUSH-NEXT: v_mad_f32 v2, -v0, v1, -v2
-; P0-GFX9-SDAG-F32FLUSH-NEXT: v_mad_f32 v1, -v0, v1, v3
-; P0-GFX9-SDAG-F32FLUSH-NEXT: v_mov_b32_e32 v0, v2
-; P0-GFX9-SDAG-F32FLUSH-NEXT: s_setpc_b64 s[30:31]
+; GFX9-SDAG-F32FLUSH-LABEL: mul_fneg_mixed_uses:
+; GFX9-SDAG-F32FLUSH: ; %bb.0:
+; GFX9-SDAG-F32FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-F32FLUSH-NEXT: v_mad_f32 v2, -v0, v1, -v2
+; GFX9-SDAG-F32FLUSH-NEXT: v_mad_f32 v1, -v0, v1, v3
+; GFX9-SDAG-F32FLUSH-NEXT: v_mov_b32_e32 v0, v2
+; GFX9-SDAG-F32FLUSH-NEXT: s_setpc_b64 s[30:31]
;
-; P0-GFX9-GISEL-F32FLUSH-LABEL: mul_fneg_mixed_uses:
-; P0-GFX9-GISEL-F32FLUSH: ; %bb.0:
-; P0-GFX9-GISEL-F32FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; P0-GFX9-GISEL-F32FLUSH-NEXT: v_mad_f32 v2, v0, -v1, -v2
-; P0-GFX9-GISEL-F32FLUSH-NEXT: v_mad_f32 v1, v0, -v1, v3
-; P0-GFX9-GISEL-F32FLUSH-NEXT: v_mov_b32_e32 v0, v2
-; P0-GFX9-GISEL-F32FLUSH-NEXT: s_setpc_b64 s[30:31]
+; GFX9-GISEL-F32FLUSH-LABEL: mul_fneg_mixed_uses:
+; GFX9-GISEL-F32FLUSH: ; %bb.0:
+; GFX9-GISEL-F32FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-F32FLUSH-NEXT: v_mad_f32 v2, v0, -v1, -v2
+; GFX9-GISEL-F32FLUSH-NEXT: v_mad_f32 v1, v0, -v1, v3
+; GFX9-GISEL-F32FLUSH-NEXT: v_mov_b32_e32 v0, v2
+; GFX9-GISEL-F32FLUSH-NEXT: s_setpc_b64 s[30:31]
;
-; P0-GFX9_4-SDAG-LABEL: mul_fneg_mixed_uses:
-; P0-GFX9_4-SDAG: ; %bb.0:
-; P0-GFX9_4-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; P0-GFX9_4-SDAG-NEXT: v_fma_f32 v2, -v0, v1, -v2
-; P0-GFX9_4-SDAG-NEXT: v_fma_f32 v1, -v0, v1, v3
-; P0-GFX9_4-SDAG-NEXT: v_mov_b32_e32 v0, v2
-; P...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/188115
More information about the llvm-branch-commits
mailing list