[llvm] [SLP] Guard FMulAdd conversion to require single-use/non-reordered FMul operands (PR #189692)

via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 31 08:25:12 PDT 2026


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-vectorizers

@llvm/pr-subscribers-llvm-transforms

Author: Alexey Bataev (alexey-bataev)

<details>
<summary>Changes</summary>

The FMulAdd (CombinedVectorize) transformation in transformNodes() marks
an FMul child entry with zero cost, assuming it is fully absorbed into
the fmuladd intrinsic. However, when any FMul scalar has multiple uses
(e.g., also stored separately), the FMul must survive as a separate
node.


---
Full diff: https://github.com/llvm/llvm-project/pull/189692.diff


2 Files Affected:

- (modified) llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp (+16) 
- (modified) llvm/test/Transforms/SLPVectorizer/AArch64/fma-conversion-multi-use-guard.ll (+6-14) 


``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index e48b68fafd806..b3fbfc5b3ee93 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -14351,6 +14351,22 @@ void BoUpSLP::transformNodes() {
       if (E.State != TreeEntry::Vectorize ||
           !E.getOperations().isAddSubLikeOp())
         break;
+      const TreeEntry *LHS = getOperandEntry(&E, 0);
+      const TreeEntry *RHS = getOperandEntry(&E, 1);
+      auto IsOneUseVectorFMulOperand = [](const TreeEntry *TE) {
+        return TE->State == TreeEntry::Vectorize &&
+               TE->ReorderIndices.empty() && TE->ReuseShuffleIndices.empty() &&
+               TE->getOpcode() == Instruction::FMul && !TE->isAltShuffle() &&
+               all_of(TE->Scalars, [&](Value *V) {
+                 return (TE->hasCopyableElements() &&
+                         TE->isCopyableElement(V)) ||
+                        V->hasOneUse();
+               });
+      };
+      if (!IsOneUseVectorFMulOperand(LHS) &&
+          (E.getOpcode() == Instruction::Sub ||
+           !IsOneUseVectorFMulOperand(RHS)))
+        break;
       if (!canConvertToFMA(E.Scalars, E.getOperations(), *DT, *DL, *TTI, *TLI)
                .isValid())
         break;
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/fma-conversion-multi-use-guard.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/fma-conversion-multi-use-guard.ll
index 624c24a99859f..2acbebf259ee9 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/fma-conversion-multi-use-guard.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/fma-conversion-multi-use-guard.ll
@@ -47,20 +47,12 @@ entry:
 define void @mixed_use_v4(ptr %src0, ptr %src1, ptr %dst, ptr %extra) {
 ; CHECK-LABEL: @mixed_use_v4(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[GEP_S0_2:%.*]] = getelementptr inbounds float, ptr [[SRC0:%.*]], i64 2
-; CHECK-NEXT:    [[GEP_S1_2:%.*]] = getelementptr inbounds float, ptr [[SRC1:%.*]], i64 2
-; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x float>, ptr [[SRC0]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x float>, ptr [[SRC1]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast <2 x float> [[TMP0]], [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = fadd fast <2 x float> [[TMP2]], splat (float 1.000000e+00)
-; CHECK-NEXT:    [[GEP_D_2:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 2
-; CHECK-NEXT:    [[TMP8:%.*]] = load <2 x float>, ptr [[GEP_S0_2]], align 4
-; CHECK-NEXT:    [[TMP5:%.*]] = load <2 x float>, ptr [[GEP_S1_2]], align 4
-; CHECK-NEXT:    [[TMP6:%.*]] = fmul fast <2 x float> [[TMP8]], [[TMP5]]
-; CHECK-NEXT:    [[TMP7:%.*]] = fadd fast <2 x float> [[TMP6]], splat (float 1.000000e+00)
-; CHECK-NEXT:    store <2 x float> [[TMP3]], ptr [[DST]], align 4
-; CHECK-NEXT:    store <2 x float> [[TMP7]], ptr [[GEP_D_2]], align 4
-; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[SRC0:%.*]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[SRC1:%.*]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast <4 x float> [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = fadd fast <4 x float> [[TMP2]], splat (float 1.000000e+00)
+; CHECK-NEXT:    store <4 x float> [[TMP3]], ptr [[DST:%.*]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
 ; CHECK-NEXT:    store float [[TMP4]], ptr [[EXTRA:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;

``````````

</details>


https://github.com/llvm/llvm-project/pull/189692


More information about the llvm-commits mailing list