[llvm] [SLP] Guard FMulAdd conversion to require single-use/non-reordered FMul operands (PR #189692)

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 31 08:45:23 PDT 2026


https://github.com/alexey-bataev updated https://github.com/llvm/llvm-project/pull/189692

>From 1f7a67f32fed55ca7d2aa3dd3124cc040c2ccb9b Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev at outlook.com>
Date: Tue, 31 Mar 2026 08:23:54 -0700
Subject: [PATCH 1/2] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20in?=
 =?UTF-8?q?itial=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.7
---
 .../Transforms/Vectorize/SLPVectorizer.cpp    | 16 +++++++++++++++
 .../AArch64/fma-conversion-multi-use-guard.ll | 20 ++++++-------------
 2 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index e48b68fafd806..b3fbfc5b3ee93 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -14351,6 +14351,22 @@ void BoUpSLP::transformNodes() {
       if (E.State != TreeEntry::Vectorize ||
           !E.getOperations().isAddSubLikeOp())
         break;
+      const TreeEntry *LHS = getOperandEntry(&E, 0);
+      const TreeEntry *RHS = getOperandEntry(&E, 1);
+      auto IsOneUseVectorFMulOperand = [](const TreeEntry *TE) {
+        return TE->State == TreeEntry::Vectorize &&
+               TE->ReorderIndices.empty() && TE->ReuseShuffleIndices.empty() &&
+               TE->getOpcode() == Instruction::FMul && !TE->isAltShuffle() &&
+               all_of(TE->Scalars, [&](Value *V) {
+                 return (TE->hasCopyableElements() &&
+                         TE->isCopyableElement(V)) ||
+                        V->hasOneUse();
+               });
+      };
+      if (!IsOneUseVectorFMulOperand(LHS) &&
+          (E.getOpcode() == Instruction::Sub ||
+           !IsOneUseVectorFMulOperand(RHS)))
+        break;
       if (!canConvertToFMA(E.Scalars, E.getOperations(), *DT, *DL, *TTI, *TLI)
                .isValid())
         break;
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/fma-conversion-multi-use-guard.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/fma-conversion-multi-use-guard.ll
index 624c24a99859f..2acbebf259ee9 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/fma-conversion-multi-use-guard.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/fma-conversion-multi-use-guard.ll
@@ -47,20 +47,12 @@ entry:
 define void @mixed_use_v4(ptr %src0, ptr %src1, ptr %dst, ptr %extra) {
 ; CHECK-LABEL: @mixed_use_v4(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[GEP_S0_2:%.*]] = getelementptr inbounds float, ptr [[SRC0:%.*]], i64 2
-; CHECK-NEXT:    [[GEP_S1_2:%.*]] = getelementptr inbounds float, ptr [[SRC1:%.*]], i64 2
-; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x float>, ptr [[SRC0]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x float>, ptr [[SRC1]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast <2 x float> [[TMP0]], [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = fadd fast <2 x float> [[TMP2]], splat (float 1.000000e+00)
-; CHECK-NEXT:    [[GEP_D_2:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 2
-; CHECK-NEXT:    [[TMP8:%.*]] = load <2 x float>, ptr [[GEP_S0_2]], align 4
-; CHECK-NEXT:    [[TMP5:%.*]] = load <2 x float>, ptr [[GEP_S1_2]], align 4
-; CHECK-NEXT:    [[TMP6:%.*]] = fmul fast <2 x float> [[TMP8]], [[TMP5]]
-; CHECK-NEXT:    [[TMP7:%.*]] = fadd fast <2 x float> [[TMP6]], splat (float 1.000000e+00)
-; CHECK-NEXT:    store <2 x float> [[TMP3]], ptr [[DST]], align 4
-; CHECK-NEXT:    store <2 x float> [[TMP7]], ptr [[GEP_D_2]], align 4
-; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[SRC0:%.*]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[SRC1:%.*]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast <4 x float> [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = fadd fast <4 x float> [[TMP2]], splat (float 1.000000e+00)
+; CHECK-NEXT:    store <4 x float> [[TMP3]], ptr [[DST:%.*]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
 ; CHECK-NEXT:    store float [[TMP4]], ptr [[EXTRA:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;

>From 5f23024fbdb6f6a82db54aafdfc5f1b65a32473f Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev at outlook.com>
Date: Tue, 31 Mar 2026 08:45:09 -0700
Subject: [PATCH 2/2] Address comment

Created using spr 1.3.7
---
 llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index b3fbfc5b3ee93..fb84cdc44d374 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -14364,7 +14364,7 @@ void BoUpSLP::transformNodes() {
                });
       };
       if (!IsOneUseVectorFMulOperand(LHS) &&
-          (E.getOpcode() == Instruction::Sub ||
+          (E.getOpcode() == Instruction::FSub ||
            !IsOneUseVectorFMulOperand(RHS)))
         break;
       if (!canConvertToFMA(E.Scalars, E.getOperations(), *DT, *DL, *TTI, *TLI)



More information about the llvm-commits mailing list