[llvm] [InstCombine] Pull unary shuffles through fneg/fabs (PR #144933)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 19 10:53:13 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Luke Lau (lukel97)
<details>
<summary>Changes</summary>
This canonicalizes fneg/fabs (shuffle X, poison, mask) -> shuffle (fneg/fabs X), posion, mask
This undoes part of b331a7ebc1e02f9939d1a4a1509e7eb6cdda3d38, but keeps the binary shuffle case i.e. shuffle fneg, fneg, mask.
By pulling out the shuffle we bring it inline with the same canonicalisation we perform on binary ops and intrinsics, which the original commit acknowledges it goes in the opposite direction.
However I think nowadays VectorCombine is more powerful and can do more optimisations when the shuffle is pulled out, so I think we should revisit this. In particular we get more shuffles folded and can perform scalarization.
On AArch64 -march=armv9 -O3 we get improvements in llvm-test-suite:
```diff
--- build.armv9-O3-a/MultiSource/Benchmarks/VersaBench/beamformer/CMakeFiles/beamformer.dir/beamformer.s
+++ build.armv9-O3-b/MultiSource/Benchmarks/VersaBench/beamformer/CMakeFiles/beamformer.dir/beamformer.s
@@ -2025,11 +2025,9 @@
fcvtn2 v3.4s, v2.2d
add x9, x23, x8
fadd.4s v2, v3, v5
- zip2.4s v4, v3, v2
- zip1.4s v2, v3, v2
- fneg.4s v2, v2
- fneg.4s v3, v4
- stp q2, q3, [x9]
+ fneg.4s v3, v3
+ fneg.4s v4, v2
+ st2.4s { v3, v4 }, [x9]
```
And on RISC-V we see more scalarization:
```diff
@@ -113,20 +117,19 @@
neg a1, a3
and t6, t5, a1
add a5, t6, s4
- vsetvli a1, zero, e64, m2, ta, ma
- vfmv.v.f v8, fa4
- vfneg.v v8, v8
- vfmul.vf v8, v8, fa5
+ fneg.d fa4, fa4
+ fmul.d fa4, fa5, fa4
mv a4, t1
mv a1, t6
+ vsetvli s1, zero, e64, m2, ta, ma
.LBB0_8: # %vector.body
# Parent Loop BB0_4 Depth=1
# => This Inner Loop Header: Depth=2
- vl2re64.v v10, (a4)
- vl2re64.v v12, (a2)
+ vl2re64.v v8, (a4)
+ vl2re64.v v10, (a2)
sub a1, a1, a3
- vfmacc.vv v12, v8, v10
- vs2r.v v12, (a2)
+ vfmacc.vf v10, fa4, v8
+ vs2r.v v10, (a2)
```
---
Full diff: https://github.com/llvm/llvm-project/pull/144933.diff
8 Files Affected:
- (modified) llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp (+5)
- (modified) llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp (-5)
- (modified) llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp (-14)
- (modified) llvm/test/Transforms/InstCombine/matrix-multiplication-negation.ll (+4-3)
- (modified) llvm/test/Transforms/InstCombine/vec_shuffle-inseltpoison.ll (+3-3)
- (modified) llvm/test/Transforms/InstCombine/vec_shuffle.ll (+21-21)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/alternate-cast-inseltpoison.ll (+10-10)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll (+10-10)
``````````diff
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 0a3837f2c0ce3..f0bf6b6eceb3a 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -2999,6 +2999,11 @@ Instruction *InstCombinerImpl::visitFNeg(UnaryOperator &I) {
return replaceInstUsesWith(I, NewCopySign);
}
+ // fneg (shuffle x, Mask) --> shuffle (fneg x), Mask
+ ArrayRef<int> Mask;
+ if (match(OneUse, m_Shuffle(m_Value(X), m_Poison(), m_Mask(Mask))))
+ return new ShuffleVectorInst(Builder.CreateFNegFMF(X, &I), Mask);
+
return nullptr;
}
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 03897117861f6..7d0b371591fb2 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1404,11 +1404,6 @@ InstCombinerImpl::foldShuffledIntrinsicOperands(IntrinsicInst *II) {
!II->getCalledFunction()->isSpeculatable())
return nullptr;
- // fabs is canonicalized to fabs (shuffle ...) in foldShuffleOfUnaryOps, so
- // avoid undoing it.
- if (match(II, m_FAbs(m_Value())))
- return nullptr;
-
Value *X;
Constant *C;
ArrayRef<int> Mask;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index f946c3856948b..a746a5229fb9a 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -2533,20 +2533,6 @@ static Instruction *foldShuffleOfUnaryOps(ShuffleVectorInst &Shuf,
bool IsFNeg = S0->getOpcode() == Instruction::FNeg;
- // Match 1-input (unary) shuffle.
- // shuffle (fneg/fabs X), Mask --> fneg/fabs (shuffle X, Mask)
- if (S0->hasOneUse() && match(Shuf.getOperand(1), m_Poison())) {
- Value *NewShuf = Builder.CreateShuffleVector(X, Shuf.getShuffleMask());
- if (IsFNeg)
- return UnaryOperator::CreateFNegFMF(NewShuf, S0);
-
- Function *FAbs = Intrinsic::getOrInsertDeclaration(
- Shuf.getModule(), Intrinsic::fabs, Shuf.getType());
- CallInst *NewF = CallInst::Create(FAbs, {NewShuf});
- NewF->setFastMathFlags(S0->getFastMathFlags());
- return NewF;
- }
-
// Match 2-input (binary) shuffle.
auto *S1 = dyn_cast<Instruction>(Shuf.getOperand(1));
Value *Y;
diff --git a/llvm/test/Transforms/InstCombine/matrix-multiplication-negation.ll b/llvm/test/Transforms/InstCombine/matrix-multiplication-negation.ll
index 331de12ddd339..c2663d8638932 100644
--- a/llvm/test/Transforms/InstCombine/matrix-multiplication-negation.ll
+++ b/llvm/test/Transforms/InstCombine/matrix-multiplication-negation.ll
@@ -205,9 +205,10 @@ define <4 x double> @matrix_multiply_two_operands_negated_with_same_size(<2 x do
define <2 x double> @matrix_multiply_two_operands_with_multiple_uses(<6 x double> %a, <3 x double> %b) {
; CHECK-LABEL: @matrix_multiply_two_operands_with_multiple_uses(
-; CHECK-NEXT: [[RES:%.*]] = tail call <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> [[A:%.*]], <3 x double> [[B:%.*]], i32 2, i32 3, i32 1)
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <6 x double> [[A]], <6 x double> poison, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT: [[RES_3:%.*]] = fsub <2 x double> [[RES]], [[TMP1]]
+; CHECK-NEXT: [[A_NEG:%.*]] = fneg <6 x double> [[A:%.*]]
+; CHECK-NEXT: [[RES:%.*]] = tail call <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> [[A]], <3 x double> [[B:%.*]], i32 2, i32 3, i32 1)
+; CHECK-NEXT: [[RES_2:%.*]] = shufflevector <6 x double> [[A_NEG]], <6 x double> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT: [[RES_3:%.*]] = fadd <2 x double> [[RES_2]], [[RES]]
; CHECK-NEXT: ret <2 x double> [[RES_3]]
;
%a.neg = fneg <6 x double> %a
diff --git a/llvm/test/Transforms/InstCombine/vec_shuffle-inseltpoison.ll b/llvm/test/Transforms/InstCombine/vec_shuffle-inseltpoison.ll
index 9aa050e8cd500..0a9c71dba7947 100644
--- a/llvm/test/Transforms/InstCombine/vec_shuffle-inseltpoison.ll
+++ b/llvm/test/Transforms/InstCombine/vec_shuffle-inseltpoison.ll
@@ -1278,9 +1278,9 @@ define <2 x float> @fsub_splat_constant1(<2 x float> %x) {
define <2 x float> @fneg(<2 x float> %x) {
; CHECK-LABEL: @fneg(
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[R:%.*]] = fneg <2 x float> [[TMP1]]
-; CHECK-NEXT: ret <2 x float> [[R]]
+; CHECK-NEXT: [[R:%.*]] = fneg <2 x float> [[TMP1:%.*]]
+; CHECK-NEXT: [[R1:%.*]] = shufflevector <2 x float> [[R]], <2 x float> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: ret <2 x float> [[R1]]
;
%splat = shufflevector <2 x float> %x, <2 x float> poison, <2 x i32> zeroinitializer
%r = fsub <2 x float> <float -0.0, float -0.0>, %splat
diff --git a/llvm/test/Transforms/InstCombine/vec_shuffle.ll b/llvm/test/Transforms/InstCombine/vec_shuffle.ll
index 83919e743d384..003eddf7f121b 100644
--- a/llvm/test/Transforms/InstCombine/vec_shuffle.ll
+++ b/llvm/test/Transforms/InstCombine/vec_shuffle.ll
@@ -1382,9 +1382,9 @@ define <2 x float> @fsub_splat_constant1(<2 x float> %x) {
define <2 x float> @fneg(<2 x float> %x) {
; CHECK-LABEL: @fneg(
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[R:%.*]] = fneg <2 x float> [[TMP1]]
-; CHECK-NEXT: ret <2 x float> [[R]]
+; CHECK-NEXT: [[R:%.*]] = fneg <2 x float> [[TMP1:%.*]]
+; CHECK-NEXT: [[R1:%.*]] = shufflevector <2 x float> [[R]], <2 x float> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: ret <2 x float> [[R1]]
;
%splat = shufflevector <2 x float> %x, <2 x float> undef, <2 x i32> zeroinitializer
%r = fsub <2 x float> <float -0.0, float -0.0>, %splat
@@ -1906,9 +1906,9 @@ define <4 x i32> @PR46872(<4 x i32> %x) {
define <2 x float> @fabs_unary_shuf(<2 x float> %x) {
; CHECK-LABEL: @fabs_unary_shuf(
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
-; CHECK-NEXT: [[R:%.*]] = call nnan nsz <2 x float> @llvm.fabs.v2f32(<2 x float> [[TMP1]])
-; CHECK-NEXT: ret <2 x float> [[R]]
+; CHECK-NEXT: [[R:%.*]] = call nnan nsz <2 x float> @llvm.fabs.v2f32(<2 x float> [[TMP1:%.*]])
+; CHECK-NEXT: [[R1:%.*]] = shufflevector <2 x float> [[R]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT: ret <2 x float> [[R1]]
;
%nx = call nsz nnan <2 x float> @llvm.fabs.v2f32(<2 x float> %x)
%r = shufflevector <2 x float> %nx, <2 x float> poison, <2 x i32> <i32 1, i32 0>
@@ -1917,9 +1917,9 @@ define <2 x float> @fabs_unary_shuf(<2 x float> %x) {
define <4 x half> @fabs_unary_shuf_widen(<2 x half> %x) {
; CHECK-LABEL: @fabs_unary_shuf_widen(
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x half> [[X:%.*]], <2 x half> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 poison>
-; CHECK-NEXT: [[R:%.*]] = call ninf <4 x half> @llvm.fabs.v4f16(<4 x half> [[TMP1]])
-; CHECK-NEXT: ret <4 x half> [[R]]
+; CHECK-NEXT: [[X:%.*]] = call ninf <2 x half> @llvm.fabs.v2f16(<2 x half> [[X1:%.*]])
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x half> [[X]], <2 x half> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 poison>
+; CHECK-NEXT: ret <4 x half> [[TMP1]]
;
%nx = call ninf <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
%r = shufflevector <2 x half> %nx, <2 x half> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 poison>
@@ -1928,9 +1928,9 @@ define <4 x half> @fabs_unary_shuf_widen(<2 x half> %x) {
define <2 x double> @fabs_unary_shuf_narrow(<4 x double> %x) {
; CHECK-LABEL: @fabs_unary_shuf_narrow(
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[X:%.*]], <4 x double> poison, <2 x i32> <i32 3, i32 0>
-; CHECK-NEXT: [[R:%.*]] = call nsz <2 x double> @llvm.fabs.v2f64(<2 x double> [[TMP1]])
-; CHECK-NEXT: ret <2 x double> [[R]]
+; CHECK-NEXT: [[X:%.*]] = call nsz <4 x double> @llvm.fabs.v4f64(<4 x double> [[X1:%.*]])
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[X]], <4 x double> poison, <2 x i32> <i32 3, i32 0>
+; CHECK-NEXT: ret <2 x double> [[TMP1]]
;
%nx = call nsz <4 x double> @llvm.fabs.v4f64(<4 x double> %x)
%r = shufflevector <4 x double> %nx, <4 x double> poison, <2 x i32> <i32 3, i32 0>
@@ -2021,9 +2021,9 @@ define <2 x float> @fabs_shuf_use3(<2 x float> %x, <2 x float> %y) {
define <2 x float> @fneg_unary_shuf(<2 x float> %x) {
; CHECK-LABEL: @fneg_unary_shuf(
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
-; CHECK-NEXT: [[R:%.*]] = fneg nnan nsz <2 x float> [[TMP1]]
-; CHECK-NEXT: ret <2 x float> [[R]]
+; CHECK-NEXT: [[R:%.*]] = fneg nnan nsz <2 x float> [[TMP1:%.*]]
+; CHECK-NEXT: [[R1:%.*]] = shufflevector <2 x float> [[R]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT: ret <2 x float> [[R1]]
;
%nx = fneg nsz nnan <2 x float> %x
%r = shufflevector <2 x float> %nx, <2 x float> poison, <2 x i32> <i32 1, i32 0>
@@ -2032,9 +2032,9 @@ define <2 x float> @fneg_unary_shuf(<2 x float> %x) {
define <4 x half> @fneg_unary_shuf_widen(<2 x half> %x) {
; CHECK-LABEL: @fneg_unary_shuf_widen(
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x half> [[X:%.*]], <2 x half> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 poison>
-; CHECK-NEXT: [[R:%.*]] = fneg ninf <4 x half> [[TMP1]]
-; CHECK-NEXT: ret <4 x half> [[R]]
+; CHECK-NEXT: [[X:%.*]] = fneg ninf <2 x half> [[X1:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x half> [[X]], <2 x half> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 poison>
+; CHECK-NEXT: ret <4 x half> [[TMP1]]
;
%nx = fneg ninf <2 x half> %x
%r = shufflevector <2 x half> %nx, <2 x half> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 poison>
@@ -2043,9 +2043,9 @@ define <4 x half> @fneg_unary_shuf_widen(<2 x half> %x) {
define <2 x double> @fneg_unary_shuf_narrow(<4 x double> %x) {
; CHECK-LABEL: @fneg_unary_shuf_narrow(
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[X:%.*]], <4 x double> poison, <2 x i32> <i32 3, i32 0>
-; CHECK-NEXT: [[R:%.*]] = fneg nsz <2 x double> [[TMP1]]
-; CHECK-NEXT: ret <2 x double> [[R]]
+; CHECK-NEXT: [[X:%.*]] = fneg nsz <4 x double> [[X1:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[X]], <4 x double> poison, <2 x i32> <i32 3, i32 0>
+; CHECK-NEXT: ret <2 x double> [[TMP1]]
;
%nx = fneg nsz <4 x double> %x
%r = shufflevector <4 x double> %nx, <4 x double> poison, <2 x i32> <i32 3, i32 0>
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast-inseltpoison.ll
index 3e2c305dbed65..6c73a9fdce851 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast-inseltpoison.ll
@@ -132,19 +132,19 @@ define <8 x i32> @fptosi_fptoui(<8 x float> %a) {
define <8 x float> @fneg_fabs(<8 x float> %a) {
; SSE2-LABEL: @fneg_fabs(
-; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-; SSE2-NEXT: [[TMP3:%.*]] = fneg <4 x float> [[TMP1]]
-; SSE2-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP2]])
-; SSE2-NEXT: [[DOTUNCASTED:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; SSE2-NEXT: [[A:%.*]] = fneg <8 x float> [[A1:%.*]]
+; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE2-NEXT: [[TMP3:%.*]] = call <8 x float> @llvm.fabs.v8f32(<8 x float> [[A1]])
+; SSE2-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; SSE2-NEXT: [[DOTUNCASTED:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; SSE2-NEXT: ret <8 x float> [[DOTUNCASTED]]
;
; SLM-LABEL: @fneg_fabs(
-; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-; SLM-NEXT: [[TMP3:%.*]] = fneg <4 x float> [[TMP1]]
-; SLM-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP2]])
-; SLM-NEXT: [[DOTUNCASTED:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; SLM-NEXT: [[A:%.*]] = fneg <8 x float> [[A1:%.*]]
+; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SLM-NEXT: [[TMP3:%.*]] = call <8 x float> @llvm.fabs.v8f32(<8 x float> [[A1]])
+; SLM-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; SLM-NEXT: [[DOTUNCASTED:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; SLM-NEXT: ret <8 x float> [[DOTUNCASTED]]
;
; AVX-LABEL: @fneg_fabs(
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll
index 880523d6474ac..225843613165a 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll
@@ -132,19 +132,19 @@ define <8 x i32> @fptosi_fptoui(<8 x float> %a) {
define <8 x float> @fneg_fabs(<8 x float> %a) {
; SSE2-LABEL: @fneg_fabs(
-; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-; SSE2-NEXT: [[TMP3:%.*]] = fneg <4 x float> [[TMP1]]
-; SSE2-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP2]])
-; SSE2-NEXT: [[DOTUNCASTED:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; SSE2-NEXT: [[A:%.*]] = fneg <8 x float> [[A1:%.*]]
+; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE2-NEXT: [[TMP3:%.*]] = call <8 x float> @llvm.fabs.v8f32(<8 x float> [[A1]])
+; SSE2-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; SSE2-NEXT: [[DOTUNCASTED:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; SSE2-NEXT: ret <8 x float> [[DOTUNCASTED]]
;
; SLM-LABEL: @fneg_fabs(
-; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-; SLM-NEXT: [[TMP3:%.*]] = fneg <4 x float> [[TMP1]]
-; SLM-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP2]])
-; SLM-NEXT: [[DOTUNCASTED:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; SLM-NEXT: [[A:%.*]] = fneg <8 x float> [[A1:%.*]]
+; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SLM-NEXT: [[TMP3:%.*]] = call <8 x float> @llvm.fabs.v8f32(<8 x float> [[A1]])
+; SLM-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; SLM-NEXT: [[DOTUNCASTED:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; SLM-NEXT: ret <8 x float> [[DOTUNCASTED]]
;
; AVX-LABEL: @fneg_fabs(
``````````
</details>
https://github.com/llvm/llvm-project/pull/144933
More information about the llvm-commits
mailing list