[llvm] [InstCombine] Pull unary shuffles through fneg/fabs (PR #144933)

Thu Jun 19 10:52:39 PDT 2025

https://github.com/lukel97 created https://github.com/llvm/llvm-project/pull/144933

This canonicalizes fneg/fabs (shuffle X, poison, mask) -> shuffle (fneg/fabs X), posion, mask

This undoes part of b331a7ebc1e02f9939d1a4a1509e7eb6cdda3d38, but keeps the binary shuffle case i.e. shuffle fneg, fneg, mask.

By pulling out the shuffle we bring it inline with the same canonicalisation we perform on binary ops and intrinsics, which the original commit acknowledges it goes in the opposite direction.

However I think nowadays VectorCombine is more powerful and can do more optimisations when the shuffle is pulled out, so I think we should revisit this. In particular we get more shuffles folded and can perform scalarization.

On AArch64 -march=armv9 -O3 we get improvements in llvm-test-suite:

```diff
--- build.armv9-O3-a/MultiSource/Benchmarks/VersaBench/beamformer/CMakeFiles/beamformer.dir/beamformer.s
+++ build.armv9-O3-b/MultiSource/Benchmarks/VersaBench/beamformer/CMakeFiles/beamformer.dir/beamformer.s
@@ -2025,11 +2025,9 @@
        fcvtn2  v3.4s, v2.2d
        add     x9, x23, x8
        fadd.4s v2, v3, v5
-       zip2.4s v4, v3, v2
-       zip1.4s v2, v3, v2
-       fneg.4s v2, v2
-       fneg.4s v3, v4
-       stp     q2, q3, [x9]
+       fneg.4s v3, v3
+       fneg.4s v4, v2
+       st2.4s  { v3, v4 }, [x9]
```

And on RISC-V we see more scalarization:

```diff
@@ -113,20 +117,19 @@
        neg     a1, a3
        and     t6, t5, a1
        add     a5, t6, s4
-       vsetvli a1, zero, e64, m2, ta, ma
-       vfmv.v.f        v8, fa4
-       vfneg.v v8, v8
-       vfmul.vf        v8, v8, fa5
+       fneg.d  fa4, fa4
+       fmul.d  fa4, fa5, fa4
        mv      a4, t1
        mv      a1, t6
+       vsetvli s1, zero, e64, m2, ta, ma
 .LBB0_8:                                # %vector.body
                                         #   Parent Loop BB0_4 Depth=1
                                         # =>  This Inner Loop Header: Depth=2
-       vl2re64.v       v10, (a4)
-       vl2re64.v       v12, (a2)
+       vl2re64.v       v8, (a4)
+       vl2re64.v       v10, (a2)
        sub     a1, a1, a3
-       vfmacc.vv       v12, v8, v10
-       vs2r.v  v12, (a2)
+       vfmacc.vf       v10, fa4, v8
+       vs2r.v  v10, (a2)
```


>From 4bdc3d68c3830f456ee3813d1b6d738f07d260a3 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Thu, 19 Jun 2025 17:10:45 +0100
Subject: [PATCH] [InstCombine] Pull unary shuffles through fneg/fabs

This canonicalizes fneg/fabs (shuffle X, poison, mask) -> shuffle (fneg/fabs X), posion, mask

This undoes part of b331a7ebc1e02f9939d1a4a1509e7eb6cdda3d38, but keeps the binary shuffle case i.e. shuffle fneg, fneg, mask.

By pulling out the shuffle we bring it inline with the same canonicalisation we perform on binary ops and intrinsics, which the original commit acknowledges it goes in the opposite direction.

However I think nowadays VectorCombine is more powerful and can do more optimisations when the shuffle is pulled out, so I think we should revisit this. In particular we get more shuffles folded and can perform scalarization.

On AArch64 -march=armv9 -O3 we get improvements in llvm-test-suite:

```diff
--- build.armv9-O3-a/MultiSource/Benchmarks/VersaBench/beamformer/CMakeFiles/beamformer.dir/beamformer.s
+++ build.armv9-O3-b/MultiSource/Benchmarks/VersaBench/beamformer/CMakeFiles/beamformer.dir/beamformer.s
@@ -2025,11 +2025,9 @@
        fcvtn2  v3.4s, v2.2d
        add     x9, x23, x8
        fadd.4s v2, v3, v5
-       zip2.4s v4, v3, v2
-       zip1.4s v2, v3, v2
-       fneg.4s v2, v2
-       fneg.4s v3, v4
-       stp     q2, q3, [x9]
+       fneg.4s v3, v3
+       fneg.4s v4, v2
+       st2.4s  { v3, v4 }, [x9]
```

And on RISC-V we see more scalarization:

```diff
@@ -113,20 +117,19 @@
        neg     a1, a3
        and     t6, t5, a1
        add     a5, t6, s4
-       vsetvli a1, zero, e64, m2, ta, ma
-       vfmv.v.f        v8, fa4
-       vfneg.v v8, v8
-       vfmul.vf        v8, v8, fa5
+       fneg.d  fa4, fa4
+       fmul.d  fa4, fa5, fa4
        mv      a4, t1
        mv      a1, t6
+       vsetvli s1, zero, e64, m2, ta, ma
 .LBB0_8:                                # %vector.body
                                         #   Parent Loop BB0_4 Depth=1
                                         # =>  This Inner Loop Header: Depth=2
-       vl2re64.v       v10, (a4)
-       vl2re64.v       v12, (a2)
+       vl2re64.v       v8, (a4)
+       vl2re64.v       v10, (a2)
        sub     a1, a1, a3
-       vfmacc.vv       v12, v8, v10
-       vs2r.v  v12, (a2)
+       vfmacc.vf       v10, fa4, v8
+       vs2r.v  v10, (a2)
```
---
 .../InstCombine/InstCombineAddSub.cpp         |  5 +++
 .../InstCombine/InstCombineCalls.cpp          |  5 ---
 .../InstCombine/InstCombineVectorOps.cpp      | 14 -------
 .../matrix-multiplication-negation.ll         |  7 ++--
 .../InstCombine/vec_shuffle-inseltpoison.ll   |  6 +--
 .../Transforms/InstCombine/vec_shuffle.ll     | 42 +++++++++----------
 .../X86/alternate-cast-inseltpoison.ll        | 20 ++++-----
 .../SLPVectorizer/X86/alternate-cast.ll       | 20 ++++-----
 8 files changed, 53 insertions(+), 66 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 0a3837f2c0ce3..f0bf6b6eceb3a 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -2999,6 +2999,11 @@ Instruction *InstCombinerImpl::visitFNeg(UnaryOperator &I) {
     return replaceInstUsesWith(I, NewCopySign);
   }
 
+  // fneg (shuffle x, Mask) --> shuffle (fneg x), Mask
+  ArrayRef<int> Mask;
+  if (match(OneUse, m_Shuffle(m_Value(X), m_Poison(), m_Mask(Mask))))
+    return new ShuffleVectorInst(Builder.CreateFNegFMF(X, &I), Mask);
+
   return nullptr;
 }
 
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 03897117861f6..7d0b371591fb2 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1404,11 +1404,6 @@ InstCombinerImpl::foldShuffledIntrinsicOperands(IntrinsicInst *II) {
       !II->getCalledFunction()->isSpeculatable())
     return nullptr;
 
-  // fabs is canonicalized to fabs (shuffle ...) in foldShuffleOfUnaryOps, so
-  // avoid undoing it.
-  if (match(II, m_FAbs(m_Value())))
-    return nullptr;
-
   Value *X;
   Constant *C;
   ArrayRef<int> Mask;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index f946c3856948b..a746a5229fb9a 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -2533,20 +2533,6 @@ static Instruction *foldShuffleOfUnaryOps(ShuffleVectorInst &Shuf,
 
   bool IsFNeg = S0->getOpcode() == Instruction::FNeg;
 
-  // Match 1-input (unary) shuffle.
-  // shuffle (fneg/fabs X), Mask --> fneg/fabs (shuffle X, Mask)
-  if (S0->hasOneUse() && match(Shuf.getOperand(1), m_Poison())) {
-    Value *NewShuf = Builder.CreateShuffleVector(X, Shuf.getShuffleMask());
-    if (IsFNeg)
-      return UnaryOperator::CreateFNegFMF(NewShuf, S0);
-
-    Function *FAbs = Intrinsic::getOrInsertDeclaration(
-        Shuf.getModule(), Intrinsic::fabs, Shuf.getType());
-    CallInst *NewF = CallInst::Create(FAbs, {NewShuf});
-    NewF->setFastMathFlags(S0->getFastMathFlags());
-    return NewF;
-  }
-
   // Match 2-input (binary) shuffle.
   auto *S1 = dyn_cast<Instruction>(Shuf.getOperand(1));
   Value *Y;
diff --git a/llvm/test/Transforms/InstCombine/matrix-multiplication-negation.ll b/llvm/test/Transforms/InstCombine/matrix-multiplication-negation.ll
index 331de12ddd339..c2663d8638932 100644
--- a/llvm/test/Transforms/InstCombine/matrix-multiplication-negation.ll
+++ b/llvm/test/Transforms/InstCombine/matrix-multiplication-negation.ll
@@ -205,9 +205,10 @@ define <4 x double> @matrix_multiply_two_operands_negated_with_same_size(<2 x do
 
 define <2 x double> @matrix_multiply_two_operands_with_multiple_uses(<6 x double> %a, <3 x double> %b) {
 ; CHECK-LABEL: @matrix_multiply_two_operands_with_multiple_uses(
-; CHECK-NEXT:    [[RES:%.*]] = tail call <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> [[A:%.*]], <3 x double> [[B:%.*]], i32 2, i32 3, i32 1)
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <6 x double> [[A]], <6 x double> poison, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[RES_3:%.*]] = fsub <2 x double> [[RES]], [[TMP1]]
+; CHECK-NEXT:    [[A_NEG:%.*]] = fneg <6 x double> [[A:%.*]]
+; CHECK-NEXT:    [[RES:%.*]] = tail call <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> [[A]], <3 x double> [[B:%.*]], i32 2, i32 3, i32 1)
+; CHECK-NEXT:    [[RES_2:%.*]] = shufflevector <6 x double> [[A_NEG]], <6 x double> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[RES_3:%.*]] = fadd <2 x double> [[RES_2]], [[RES]]
 ; CHECK-NEXT:    ret <2 x double> [[RES_3]]
 ;
   %a.neg = fneg <6 x double> %a
diff --git a/llvm/test/Transforms/InstCombine/vec_shuffle-inseltpoison.ll b/llvm/test/Transforms/InstCombine/vec_shuffle-inseltpoison.ll
index 9aa050e8cd500..0a9c71dba7947 100644
--- a/llvm/test/Transforms/InstCombine/vec_shuffle-inseltpoison.ll
+++ b/llvm/test/Transforms/InstCombine/vec_shuffle-inseltpoison.ll
@@ -1278,9 +1278,9 @@ define <2 x float> @fsub_splat_constant1(<2 x float> %x) {
 
 define <2 x float> @fneg(<2 x float> %x) {
 ; CHECK-LABEL: @fneg(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT:    [[R:%.*]] = fneg <2 x float> [[TMP1]]
-; CHECK-NEXT:    ret <2 x float> [[R]]
+; CHECK-NEXT:    [[R:%.*]] = fneg <2 x float> [[TMP1:%.*]]
+; CHECK-NEXT:    [[R1:%.*]] = shufflevector <2 x float> [[R]], <2 x float> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <2 x float> [[R1]]
 ;
   %splat = shufflevector <2 x float> %x, <2 x float> poison, <2 x i32> zeroinitializer
   %r = fsub <2 x float> <float -0.0, float -0.0>, %splat
diff --git a/llvm/test/Transforms/InstCombine/vec_shuffle.ll b/llvm/test/Transforms/InstCombine/vec_shuffle.ll
index 83919e743d384..003eddf7f121b 100644
--- a/llvm/test/Transforms/InstCombine/vec_shuffle.ll
+++ b/llvm/test/Transforms/InstCombine/vec_shuffle.ll
@@ -1382,9 +1382,9 @@ define <2 x float> @fsub_splat_constant1(<2 x float> %x) {
 
 define <2 x float> @fneg(<2 x float> %x) {
 ; CHECK-LABEL: @fneg(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT:    [[R:%.*]] = fneg <2 x float> [[TMP1]]
-; CHECK-NEXT:    ret <2 x float> [[R]]
+; CHECK-NEXT:    [[R:%.*]] = fneg <2 x float> [[TMP1:%.*]]
+; CHECK-NEXT:    [[R1:%.*]] = shufflevector <2 x float> [[R]], <2 x float> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <2 x float> [[R1]]
 ;
   %splat = shufflevector <2 x float> %x, <2 x float> undef, <2 x i32> zeroinitializer
   %r = fsub <2 x float> <float -0.0, float -0.0>, %splat
@@ -1906,9 +1906,9 @@ define <4 x i32> @PR46872(<4 x i32> %x) {
 
 define <2 x float> @fabs_unary_shuf(<2 x float> %x) {
 ; CHECK-LABEL: @fabs_unary_shuf(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
-; CHECK-NEXT:    [[R:%.*]] = call nnan nsz <2 x float> @llvm.fabs.v2f32(<2 x float> [[TMP1]])
-; CHECK-NEXT:    ret <2 x float> [[R]]
+; CHECK-NEXT:    [[R:%.*]] = call nnan nsz <2 x float> @llvm.fabs.v2f32(<2 x float> [[TMP1:%.*]])
+; CHECK-NEXT:    [[R1:%.*]] = shufflevector <2 x float> [[R]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    ret <2 x float> [[R1]]
 ;
   %nx = call nsz nnan <2 x float> @llvm.fabs.v2f32(<2 x float> %x)
   %r = shufflevector <2 x float> %nx, <2 x float> poison, <2 x i32> <i32 1, i32 0>
@@ -1917,9 +1917,9 @@ define <2 x float> @fabs_unary_shuf(<2 x float> %x) {
 
 define <4 x half> @fabs_unary_shuf_widen(<2 x half> %x) {
 ; CHECK-LABEL: @fabs_unary_shuf_widen(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x half> [[X:%.*]], <2 x half> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 poison>
-; CHECK-NEXT:    [[R:%.*]] = call ninf <4 x half> @llvm.fabs.v4f16(<4 x half> [[TMP1]])
-; CHECK-NEXT:    ret <4 x half> [[R]]
+; CHECK-NEXT:    [[X:%.*]] = call ninf <2 x half> @llvm.fabs.v2f16(<2 x half> [[X1:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x half> [[X]], <2 x half> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 poison>
+; CHECK-NEXT:    ret <4 x half> [[TMP1]]
 ;
   %nx = call ninf <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
   %r = shufflevector <2 x half> %nx, <2 x half> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 poison>
@@ -1928,9 +1928,9 @@ define <4 x half> @fabs_unary_shuf_widen(<2 x half> %x) {
 
 define <2 x double> @fabs_unary_shuf_narrow(<4 x double> %x) {
 ; CHECK-LABEL: @fabs_unary_shuf_narrow(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[X:%.*]], <4 x double> poison, <2 x i32> <i32 3, i32 0>
-; CHECK-NEXT:    [[R:%.*]] = call nsz <2 x double> @llvm.fabs.v2f64(<2 x double> [[TMP1]])
-; CHECK-NEXT:    ret <2 x double> [[R]]
+; CHECK-NEXT:    [[X:%.*]] = call nsz <4 x double> @llvm.fabs.v4f64(<4 x double> [[X1:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[X]], <4 x double> poison, <2 x i32> <i32 3, i32 0>
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %nx = call nsz <4 x double> @llvm.fabs.v4f64(<4 x double> %x)
   %r = shufflevector <4 x double> %nx, <4 x double> poison, <2 x i32> <i32 3, i32 0>
@@ -2021,9 +2021,9 @@ define <2 x float> @fabs_shuf_use3(<2 x float> %x, <2 x float> %y) {
 
 define <2 x float> @fneg_unary_shuf(<2 x float> %x) {
 ; CHECK-LABEL: @fneg_unary_shuf(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
-; CHECK-NEXT:    [[R:%.*]] = fneg nnan nsz <2 x float> [[TMP1]]
-; CHECK-NEXT:    ret <2 x float> [[R]]
+; CHECK-NEXT:    [[R:%.*]] = fneg nnan nsz <2 x float> [[TMP1:%.*]]
+; CHECK-NEXT:    [[R1:%.*]] = shufflevector <2 x float> [[R]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    ret <2 x float> [[R1]]
 ;
   %nx = fneg nsz nnan <2 x float> %x
   %r = shufflevector <2 x float> %nx, <2 x float> poison, <2 x i32> <i32 1, i32 0>
@@ -2032,9 +2032,9 @@ define <2 x float> @fneg_unary_shuf(<2 x float> %x) {
 
 define <4 x half> @fneg_unary_shuf_widen(<2 x half> %x) {
 ; CHECK-LABEL: @fneg_unary_shuf_widen(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x half> [[X:%.*]], <2 x half> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 poison>
-; CHECK-NEXT:    [[R:%.*]] = fneg ninf <4 x half> [[TMP1]]
-; CHECK-NEXT:    ret <4 x half> [[R]]
+; CHECK-NEXT:    [[X:%.*]] = fneg ninf <2 x half> [[X1:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x half> [[X]], <2 x half> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 poison>
+; CHECK-NEXT:    ret <4 x half> [[TMP1]]
 ;
   %nx = fneg ninf <2 x half> %x
   %r = shufflevector <2 x half> %nx, <2 x half> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 poison>
@@ -2043,9 +2043,9 @@ define <4 x half> @fneg_unary_shuf_widen(<2 x half> %x) {
 
 define <2 x double> @fneg_unary_shuf_narrow(<4 x double> %x) {
 ; CHECK-LABEL: @fneg_unary_shuf_narrow(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[X:%.*]], <4 x double> poison, <2 x i32> <i32 3, i32 0>
-; CHECK-NEXT:    [[R:%.*]] = fneg nsz <2 x double> [[TMP1]]
-; CHECK-NEXT:    ret <2 x double> [[R]]
+; CHECK-NEXT:    [[X:%.*]] = fneg nsz <4 x double> [[X1:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[X]], <4 x double> poison, <2 x i32> <i32 3, i32 0>
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %nx = fneg nsz <4 x double> %x
   %r = shufflevector <4 x double> %nx, <4 x double> poison, <2 x i32> <i32 3, i32 0>
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast-inseltpoison.ll
index 3e2c305dbed65..6c73a9fdce851 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast-inseltpoison.ll
@@ -132,19 +132,19 @@ define <8 x i32> @fptosi_fptoui(<8 x float> %a) {
 
 define <8 x float> @fneg_fabs(<8 x float> %a) {
 ; SSE2-LABEL: @fneg_fabs(
-; SSE2-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SSE2-NEXT:    [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-; SSE2-NEXT:    [[TMP3:%.*]] = fneg <4 x float> [[TMP1]]
-; SSE2-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP2]])
-; SSE2-NEXT:    [[DOTUNCASTED:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; SSE2-NEXT:    [[A:%.*]] = fneg <8 x float> [[A1:%.*]]
+; SSE2-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE2-NEXT:    [[TMP3:%.*]] = call <8 x float> @llvm.fabs.v8f32(<8 x float> [[A1]])
+; SSE2-NEXT:    [[TMP4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; SSE2-NEXT:    [[DOTUNCASTED:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; SSE2-NEXT:    ret <8 x float> [[DOTUNCASTED]]
 ;
 ; SLM-LABEL: @fneg_fabs(
-; SLM-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SLM-NEXT:    [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-; SLM-NEXT:    [[TMP3:%.*]] = fneg <4 x float> [[TMP1]]
-; SLM-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP2]])
-; SLM-NEXT:    [[DOTUNCASTED:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; SLM-NEXT:    [[A:%.*]] = fneg <8 x float> [[A1:%.*]]
+; SLM-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SLM-NEXT:    [[TMP3:%.*]] = call <8 x float> @llvm.fabs.v8f32(<8 x float> [[A1]])
+; SLM-NEXT:    [[TMP4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; SLM-NEXT:    [[DOTUNCASTED:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; SLM-NEXT:    ret <8 x float> [[DOTUNCASTED]]
 ;
 ; AVX-LABEL: @fneg_fabs(
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll
index 880523d6474ac..225843613165a 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll
@@ -132,19 +132,19 @@ define <8 x i32> @fptosi_fptoui(<8 x float> %a) {
 
 define <8 x float> @fneg_fabs(<8 x float> %a) {
 ; SSE2-LABEL: @fneg_fabs(
-; SSE2-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SSE2-NEXT:    [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-; SSE2-NEXT:    [[TMP3:%.*]] = fneg <4 x float> [[TMP1]]
-; SSE2-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP2]])
-; SSE2-NEXT:    [[DOTUNCASTED:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; SSE2-NEXT:    [[A:%.*]] = fneg <8 x float> [[A1:%.*]]
+; SSE2-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE2-NEXT:    [[TMP3:%.*]] = call <8 x float> @llvm.fabs.v8f32(<8 x float> [[A1]])
+; SSE2-NEXT:    [[TMP4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; SSE2-NEXT:    [[DOTUNCASTED:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; SSE2-NEXT:    ret <8 x float> [[DOTUNCASTED]]
 ;
 ; SLM-LABEL: @fneg_fabs(
-; SLM-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SLM-NEXT:    [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-; SLM-NEXT:    [[TMP3:%.*]] = fneg <4 x float> [[TMP1]]
-; SLM-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP2]])
-; SLM-NEXT:    [[DOTUNCASTED:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; SLM-NEXT:    [[A:%.*]] = fneg <8 x float> [[A1:%.*]]
+; SLM-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SLM-NEXT:    [[TMP3:%.*]] = call <8 x float> @llvm.fabs.v8f32(<8 x float> [[A1]])
+; SLM-NEXT:    [[TMP4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; SLM-NEXT:    [[DOTUNCASTED:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; SLM-NEXT:    ret <8 x float> [[DOTUNCASTED]]
 ;
 ; AVX-LABEL: @fneg_fabs(