[llvm] [IR] Add new CreateVectorInterleave interface and constant fold (PR #150931)
David Sherwood via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 28 08:16:35 PDT 2025
https://github.com/david-arm updated https://github.com/llvm/llvm-project/pull/150931
>From 5cbdfe8292e2639246438a0805e33b8a5621b940 Mon Sep 17 00:00:00 2001
From: David Sherwood <david.sherwood at arm.com>
Date: Mon, 28 Jul 2025 12:18:13 +0000
Subject: [PATCH 1/2] [IR] Add new CreateVectorInterleave interface and
constant fold
This PR adds a new interface to IRBuilder called
CreateVectorInterleave, which can be used to create
vector.interleave intrinsics of factors 2-8. I've also added
a new interface to the Folder called FoldVectorInterleave,
which can spot when every operand is the same splat and
instead return a new splat of the appropriate size.
For convenience I have also moved getInterleaveIntrinsicID
and getDeinterleaveIntrinsicID from VectorUtils.cpp to
Intrinsics.cpp where it can be used by IRBuilder.
---
.../llvm/Analysis/InstSimplifyFolder.h | 4 ++
llvm/include/llvm/Analysis/TargetFolder.h | 18 ++++++
llvm/include/llvm/Analysis/VectorUtils.h | 6 --
llvm/include/llvm/IR/ConstantFolder.h | 18 ++++++
llvm/include/llvm/IR/IRBuilder.h | 2 +
llvm/include/llvm/IR/IRBuilderFolder.h | 2 +
llvm/include/llvm/IR/Intrinsics.h | 11 +++-
llvm/include/llvm/IR/NoFolder.h | 4 ++
llvm/lib/Analysis/ConstantFolding.cpp | 12 ++++
llvm/lib/Analysis/VectorUtils.cpp | 24 --------
.../lib/CodeGen/ComplexDeinterleavingPass.cpp | 17 +++---
llvm/lib/IR/IRBuilder.cpp | 33 ++++++++++-
llvm/lib/IR/Intrinsics.cpp | 24 ++++++++
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 11 +---
.../complex-deinterleaving-opt-crash.ll | 6 +-
...rleaving-reductions-predicated-scalable.ll | 55 +++++++++----------
...plex-deinterleaving-reductions-scalable.ll | 52 +++++++++---------
17 files changed, 186 insertions(+), 113 deletions(-)
diff --git a/llvm/include/llvm/Analysis/InstSimplifyFolder.h b/llvm/include/llvm/Analysis/InstSimplifyFolder.h
index 58793ed977f68..b7e3decb91d61 100644
--- a/llvm/include/llvm/Analysis/InstSimplifyFolder.h
+++ b/llvm/include/llvm/Analysis/InstSimplifyFolder.h
@@ -125,6 +125,10 @@ class LLVM_ABI InstSimplifyFolder final : public IRBuilderFolder {
dyn_cast_if_present<CallBase>(FMFSource));
}
+ Value *FoldVectorInterleave(ArrayRef<Value *> Ops) const override {
+ return ConstFolder.FoldVectorInterleave(Ops);
+ }
+
//===--------------------------------------------------------------------===//
// Cast/Conversion Operators
//===--------------------------------------------------------------------===//
diff --git a/llvm/include/llvm/Analysis/TargetFolder.h b/llvm/include/llvm/Analysis/TargetFolder.h
index d27455cf3505d..151ff93975ae6 100644
--- a/llvm/include/llvm/Analysis/TargetFolder.h
+++ b/llvm/include/llvm/Analysis/TargetFolder.h
@@ -189,6 +189,24 @@ class LLVM_ABI TargetFolder final : public IRBuilderFolder {
return nullptr;
}
+ Value *FoldVectorInterleave(ArrayRef<Value *> Ops) const override {
+ // Check to see if all operands are the same.
+ for (unsigned I = 1; I < Ops.size(); I++) {
+ if (Ops[I] != Ops[0])
+ return nullptr;
+ }
+
+ // Is this just a large splat?
+ if (auto *C = dyn_cast<Constant>(Ops[0])) {
+ if (auto *V = C->getSplatValue()) {
+ auto *SubvecTy = cast<VectorType>(Ops[0]->getType());
+ return ConstantVector::getSplat(
+ SubvecTy->getElementCount() * Ops.size(), V);
+ }
+ }
+ return nullptr;
+ }
+
Value *FoldBinaryIntrinsic(Intrinsic::ID ID, Value *LHS, Value *RHS, Type *Ty,
Instruction *FMFSource) const override {
auto *C1 = dyn_cast<Constant>(LHS);
diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h
index 9a2773c06bae6..b55c4e0a6bf76 100644
--- a/llvm/include/llvm/Analysis/VectorUtils.h
+++ b/llvm/include/llvm/Analysis/VectorUtils.h
@@ -177,12 +177,6 @@ LLVM_ABI bool isVectorIntrinsicWithStructReturnOverloadAtField(
LLVM_ABI Intrinsic::ID
getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI);
-/// Returns the corresponding llvm.vector.interleaveN intrinsic for factor N.
-LLVM_ABI Intrinsic::ID getInterleaveIntrinsicID(unsigned Factor);
-
-/// Returns the corresponding llvm.vector.deinterleaveN intrinsic for factor N.
-LLVM_ABI Intrinsic::ID getDeinterleaveIntrinsicID(unsigned Factor);
-
/// Returns the corresponding factor of llvm.vector.interleaveN intrinsics.
LLVM_ABI unsigned getInterleaveIntrinsicFactor(Intrinsic::ID ID);
diff --git a/llvm/include/llvm/IR/ConstantFolder.h b/llvm/include/llvm/IR/ConstantFolder.h
index 26b7242abc4d9..578f44aabf0e9 100644
--- a/llvm/include/llvm/IR/ConstantFolder.h
+++ b/llvm/include/llvm/IR/ConstantFolder.h
@@ -181,6 +181,24 @@ class LLVM_ABI ConstantFolder final : public IRBuilderFolder {
return nullptr;
}
+ Value *FoldVectorInterleave(ArrayRef<Value *> Ops) const override {
+ // Check to see if all operands are the same.
+ for (unsigned I = 1; I < Ops.size(); I++) {
+ if (Ops[I] != Ops[0])
+ return nullptr;
+ }
+
+ // Is this just a large splat?
+ if (auto *C = dyn_cast<Constant>(Ops[0])) {
+ if (auto *V = C->getSplatValue()) {
+ auto *SubvecTy = cast<VectorType>(Ops[0]->getType());
+ return ConstantVector::getSplat(
+ SubvecTy->getElementCount() * Ops.size(), V);
+ }
+ }
+ return nullptr;
+ }
+
Value *FoldBinaryIntrinsic(Intrinsic::ID ID, Value *LHS, Value *RHS, Type *Ty,
Instruction *FMFSource) const override {
// Use TargetFolder or InstSimplifyFolder instead.
diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h
index 7c600e762a451..6d3d864b46559 100644
--- a/llvm/include/llvm/IR/IRBuilder.h
+++ b/llvm/include/llvm/IR/IRBuilder.h
@@ -2614,6 +2614,8 @@ class IRBuilderBase {
return CreateShuffleVector(V, PoisonValue::get(V->getType()), Mask, Name);
}
+ Value *CreateVectorInterleave(ArrayRef<Value *> Ops, const Twine &Name = "");
+
Value *CreateExtractValue(Value *Agg, ArrayRef<unsigned> Idxs,
const Twine &Name = "") {
if (auto *V = Folder.FoldExtractValue(Agg, Idxs))
diff --git a/llvm/include/llvm/IR/IRBuilderFolder.h b/llvm/include/llvm/IR/IRBuilderFolder.h
index db4ab5af2433a..24416712c3d82 100644
--- a/llvm/include/llvm/IR/IRBuilderFolder.h
+++ b/llvm/include/llvm/IR/IRBuilderFolder.h
@@ -75,6 +75,8 @@ class LLVM_ABI IRBuilderFolder {
virtual Value *FoldCast(Instruction::CastOps Op, Value *V,
Type *DestTy) const = 0;
+ virtual Value *FoldVectorInterleave(ArrayRef<Value *> Ops) const = 0;
+
virtual Value *
FoldBinaryIntrinsic(Intrinsic::ID ID, Value *LHS, Value *RHS, Type *Ty,
Instruction *FMFSource = nullptr) const = 0;
diff --git a/llvm/include/llvm/IR/Intrinsics.h b/llvm/include/llvm/IR/Intrinsics.h
index 156805293367b..48735b06d3f53 100644
--- a/llvm/include/llvm/IR/Intrinsics.h
+++ b/llvm/include/llvm/IR/Intrinsics.h
@@ -283,8 +283,15 @@ namespace Intrinsic {
// or of the wrong kind will be renamed by adding ".renamed" to the name.
LLVM_ABI std::optional<Function *> remangleIntrinsicFunction(Function *F);
-} // End Intrinsic namespace
+ /// Returns the corresponding llvm.vector.interleaveN intrinsic for factor N.
+ LLVM_ABI Intrinsic::ID getInterleaveIntrinsicID(unsigned Factor);
-} // End llvm namespace
+ /// Returns the corresponding llvm.vector.deinterleaveN intrinsic for factor
+ /// N.
+ LLVM_ABI Intrinsic::ID getDeinterleaveIntrinsicID(unsigned Factor);
+
+ } // namespace Intrinsic
+
+ } // namespace llvm
#endif
diff --git a/llvm/include/llvm/IR/NoFolder.h b/llvm/include/llvm/IR/NoFolder.h
index 9f16c6983313a..cb843c8f7a19e 100644
--- a/llvm/include/llvm/IR/NoFolder.h
+++ b/llvm/include/llvm/IR/NoFolder.h
@@ -118,6 +118,10 @@ class LLVM_ABI NoFolder final : public IRBuilderFolder {
return nullptr;
}
+ Value *FoldVectorInterleave(ArrayRef<Value *> Ops) const override {
+ return nullptr;
+ }
+
//===--------------------------------------------------------------------===//
// Cast/Conversion Operators
//===--------------------------------------------------------------------===//
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index 759c553111d06..ea4c49db92818 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -1641,7 +1641,19 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
case Intrinsic::vector_extract:
case Intrinsic::vector_insert:
case Intrinsic::vector_interleave2:
+ case Intrinsic::vector_interleave3:
+ case Intrinsic::vector_interleave4:
+ case Intrinsic::vector_interleave5:
+ case Intrinsic::vector_interleave6:
+ case Intrinsic::vector_interleave7:
+ case Intrinsic::vector_interleave8:
case Intrinsic::vector_deinterleave2:
+ case Intrinsic::vector_deinterleave3:
+ case Intrinsic::vector_deinterleave4:
+ case Intrinsic::vector_deinterleave5:
+ case Intrinsic::vector_deinterleave6:
+ case Intrinsic::vector_deinterleave7:
+ case Intrinsic::vector_deinterleave8:
// Target intrinsics
case Intrinsic::amdgcn_perm:
case Intrinsic::amdgcn_wave_reduce_umin:
diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp
index 1b3da590cff7f..150ddced03b15 100644
--- a/llvm/lib/Analysis/VectorUtils.cpp
+++ b/llvm/lib/Analysis/VectorUtils.cpp
@@ -240,30 +240,6 @@ Intrinsic::ID llvm::getVectorIntrinsicIDForCall(const CallInst *CI,
return Intrinsic::not_intrinsic;
}
-struct InterleaveIntrinsic {
- Intrinsic::ID Interleave, Deinterleave;
-};
-
-static InterleaveIntrinsic InterleaveIntrinsics[] = {
- {Intrinsic::vector_interleave2, Intrinsic::vector_deinterleave2},
- {Intrinsic::vector_interleave3, Intrinsic::vector_deinterleave3},
- {Intrinsic::vector_interleave4, Intrinsic::vector_deinterleave4},
- {Intrinsic::vector_interleave5, Intrinsic::vector_deinterleave5},
- {Intrinsic::vector_interleave6, Intrinsic::vector_deinterleave6},
- {Intrinsic::vector_interleave7, Intrinsic::vector_deinterleave7},
- {Intrinsic::vector_interleave8, Intrinsic::vector_deinterleave8},
-};
-
-Intrinsic::ID llvm::getInterleaveIntrinsicID(unsigned Factor) {
- assert(Factor >= 2 && Factor <= 8 && "Unexpected factor");
- return InterleaveIntrinsics[Factor - 2].Interleave;
-}
-
-Intrinsic::ID llvm::getDeinterleaveIntrinsicID(unsigned Factor) {
- assert(Factor >= 2 && Factor <= 8 && "Unexpected factor");
- return InterleaveIntrinsics[Factor - 2].Deinterleave;
-}
-
unsigned llvm::getInterleaveIntrinsicFactor(Intrinsic::ID ID) {
switch (ID) {
case Intrinsic::vector_interleave2:
diff --git a/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp b/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp
index 8855740f0cc8f..0cf1a5b390ae4 100644
--- a/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp
+++ b/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp
@@ -2194,11 +2194,10 @@ Value *ComplexDeinterleavingGraph::replaceNode(IRBuilderBase &Builder,
// Splats that are not constant are interleaved where they are located
Instruction *InsertPoint = (I->comesBefore(R) ? R : I)->getNextNode();
IRBuilder<> IRB(InsertPoint);
- ReplacementNode = IRB.CreateIntrinsic(Intrinsic::vector_interleave2,
- NewTy, {Node->Real, Node->Imag});
+ ReplacementNode = IRB.CreateVectorInterleave({Node->Real, Node->Imag});
} else {
- ReplacementNode = Builder.CreateIntrinsic(
- Intrinsic::vector_interleave2, NewTy, {Node->Real, Node->Imag});
+ ReplacementNode =
+ Builder.CreateVectorInterleave({Node->Real, Node->Imag});
}
break;
}
@@ -2228,8 +2227,7 @@ Value *ComplexDeinterleavingGraph::replaceNode(IRBuilderBase &Builder,
auto *B = replaceNode(Builder, Node->Operands[1]);
auto *NewMaskTy = VectorType::getDoubleElementsVectorType(
cast<VectorType>(MaskReal->getType()));
- auto *NewMask = Builder.CreateIntrinsic(Intrinsic::vector_interleave2,
- NewMaskTy, {MaskReal, MaskImag});
+ auto *NewMask = Builder.CreateVectorInterleave({MaskReal, MaskImag});
ReplacementNode = Builder.CreateSelect(NewMask, A, B);
break;
}
@@ -2260,8 +2258,8 @@ void ComplexDeinterleavingGraph::processReductionSingle(
}
if (!NewInit)
- NewInit = Builder.CreateIntrinsic(Intrinsic::vector_interleave2, NewVTy,
- {Init, Constant::getNullValue(VTy)});
+ NewInit =
+ Builder.CreateVectorInterleave({Init, Constant::getNullValue(VTy)});
NewPHI->addIncoming(NewInit, Incoming);
NewPHI->addIncoming(OperationReplacement, BackEdge);
@@ -2289,8 +2287,7 @@ void ComplexDeinterleavingGraph::processReductionOperation(
Value *InitImag = OldPHIImag->getIncomingValueForBlock(Incoming);
IRBuilder<> Builder(Incoming->getTerminator());
- auto *NewInit = Builder.CreateIntrinsic(Intrinsic::vector_interleave2, NewVTy,
- {InitReal, InitImag});
+ auto *NewInit = Builder.CreateVectorInterleave({InitReal, InitImag});
NewPHI->addIncoming(NewInit, Incoming);
NewPHI->addIncoming(OperationReplacement, BackEdge);
diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp
index 28037d7ec5616..35f47f23196fa 100644
--- a/llvm/lib/IR/IRBuilder.cpp
+++ b/llvm/lib/IR/IRBuilder.cpp
@@ -13,6 +13,7 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
@@ -1144,9 +1145,35 @@ Value *IRBuilderBase::CreateVectorSplat(ElementCount EC, Value *V,
return CreateShuffleVector(V, Zeros, Name + ".splat");
}
-Value *IRBuilderBase::CreatePreserveArrayAccessIndex(
- Type *ElTy, Value *Base, unsigned Dimension, unsigned LastIndex,
- MDNode *DbgInfo) {
+Value *IRBuilderBase::CreateVectorInterleave(ArrayRef<Value *> Ops,
+ const Twine &Name) {
+ assert(Ops.size() >= 2 && Ops.size() <= 8 &&
+ "Unexpected number of operands to interleave");
+
+ // Make sure all operands are the same type.
+ assert(isa<VectorType>(Ops[0]->getType()) && "Unexpected type");
+
+#ifndef NDEBUG
+ for (unsigned I = 1; I < Ops.size(); I++) {
+ assert(Ops[I]->getType() == Ops[0]->getType() &&
+ "Vector interleave expects matching operand types!");
+ }
+#endif
+
+ if (auto *V = Folder.FoldVectorInterleave(Ops))
+ return V;
+
+ unsigned IID = Intrinsic::getInterleaveIntrinsicID(Ops.size());
+ auto *SubvecTy = cast<VectorType>(Ops[0]->getType());
+ Type *DestTy = VectorType::get(SubvecTy->getElementType(),
+ SubvecTy->getElementCount() * Ops.size());
+ return CreateIntrinsic(IID, {DestTy}, Ops, {}, Name);
+}
+
+Value *IRBuilderBase::CreatePreserveArrayAccessIndex(Type *ElTy, Value *Base,
+ unsigned Dimension,
+ unsigned LastIndex,
+ MDNode *DbgInfo) {
auto *BaseType = Base->getType();
assert(isa<PointerType>(BaseType) &&
"Invalid Base ptr type for preserve.array.access.index.");
diff --git a/llvm/lib/IR/Intrinsics.cpp b/llvm/lib/IR/Intrinsics.cpp
index 6c35ade3e57c5..58a1f745a7122 100644
--- a/llvm/lib/IR/Intrinsics.cpp
+++ b/llvm/lib/IR/Intrinsics.cpp
@@ -1133,3 +1133,27 @@ std::optional<Function *> Intrinsic::remangleIntrinsicFunction(Function *F) {
"Shouldn't change the signature");
return NewDecl;
}
+
+struct InterleaveIntrinsic {
+ Intrinsic::ID Interleave, Deinterleave;
+};
+
+static InterleaveIntrinsic InterleaveIntrinsics[] = {
+ {Intrinsic::vector_interleave2, Intrinsic::vector_deinterleave2},
+ {Intrinsic::vector_interleave3, Intrinsic::vector_deinterleave3},
+ {Intrinsic::vector_interleave4, Intrinsic::vector_deinterleave4},
+ {Intrinsic::vector_interleave5, Intrinsic::vector_deinterleave5},
+ {Intrinsic::vector_interleave6, Intrinsic::vector_deinterleave6},
+ {Intrinsic::vector_interleave7, Intrinsic::vector_deinterleave7},
+ {Intrinsic::vector_interleave8, Intrinsic::vector_deinterleave8},
+};
+
+Intrinsic::ID Intrinsic::getInterleaveIntrinsicID(unsigned Factor) {
+ assert(Factor >= 2 && Factor <= 8 && "Unexpected factor");
+ return InterleaveIntrinsics[Factor - 2].Interleave;
+}
+
+Intrinsic::ID Intrinsic::getDeinterleaveIntrinsicID(unsigned Factor) {
+ assert(Factor >= 2 && Factor <= 8 && "Unexpected factor");
+ return InterleaveIntrinsics[Factor - 2].Deinterleave;
+}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 225658b76827e..68e7c20a070f4 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -3391,12 +3391,7 @@ static Value *interleaveVectors(IRBuilderBase &Builder, ArrayRef<Value *> Vals,
// must use intrinsics to interleave.
if (VecTy->isScalableTy()) {
assert(Factor <= 8 && "Unsupported interleave factor for scalable vectors");
- VectorType *InterleaveTy =
- VectorType::get(VecTy->getElementType(),
- VecTy->getElementCount().multiplyCoefficientBy(Factor));
- return Builder.CreateIntrinsic(InterleaveTy,
- getInterleaveIntrinsicID(Factor), Vals,
- /*FMFSource=*/nullptr, Name);
+ return Builder.CreateVectorInterleave(Vals, Name);
}
// Fixed length. Start by concatenating all vectors into a wide vector.
@@ -3503,8 +3498,8 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
assert(InterleaveFactor <= 8 &&
"Unsupported deinterleave factor for scalable vectors");
NewLoad = State.Builder.CreateIntrinsic(
- getDeinterleaveIntrinsicID(InterleaveFactor), NewLoad->getType(),
- NewLoad,
+ Intrinsic::getDeinterleaveIntrinsicID(InterleaveFactor),
+ NewLoad->getType(), NewLoad,
/*FMFSource=*/nullptr, "strided.vec");
}
diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-opt-crash.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-opt-crash.ll
index b62dbc0ee8ea3..b3939268832f7 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-opt-crash.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-opt-crash.ll
@@ -9,10 +9,9 @@ define void @reprocessing_crash() #0 {
; CHECK-LABEL: define void @reprocessing_crash(
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*]]:
-; CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.vector.interleave2.nxv4f64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x double> zeroinitializer)
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[TMP1:%.*]] = phi <vscale x 4 x double> [ [[TMP0]], %[[ENTRY]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = phi <vscale x 4 x double> [ zeroinitializer, %[[ENTRY]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP2]] = fsub <vscale x 4 x double> [[TMP1]], zeroinitializer
; CHECK-NEXT: br i1 false, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]]
; CHECK: [[MIDDLE_BLOCK]]:
@@ -48,11 +47,10 @@ define double @test_fp_single_reduction(i1 %c) #2 {
; CHECK-LABEL: define double @test_fp_single_reduction(
; CHECK-SAME: i1 [[C:%.*]]) #[[ATTR1:[0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*]]:
-; CHECK-NEXT: [[TMP0:%.*]] = call <8 x double> @llvm.vector.interleave2.v8f64(<4 x double> zeroinitializer, <4 x double> zeroinitializer)
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[VEC_PHI218:%.*]] = phi <4 x double> [ zeroinitializer, %[[ENTRY]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP1:%.*]] = phi <8 x double> [ [[TMP0]], %[[ENTRY]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = phi <8 x double> [ zeroinitializer, %[[ENTRY]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x double> zeroinitializer, <8 x double> zeroinitializer, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
; CHECK-NEXT: [[TMP2]] = fadd <4 x double> [[VEC_PHI218]], [[STRIDED_VEC]]
; CHECK-NEXT: [[TMP3]] = fadd <8 x double> [[TMP1]], zeroinitializer
diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll
index 880bd2904154c..d67aa08125f74 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll
@@ -14,20 +14,19 @@ target triple = "aarch64"
define %"class.std::complex" @complex_mul_v2f64(ptr %a, ptr %b) {
; CHECK-LABEL: complex_mul_v2f64:
; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: movi v1.2d, #0000000000000000
; CHECK-NEXT: mov w8, #100 // =0x64
-; CHECK-NEXT: cntd x9
; CHECK-NEXT: whilelo p1.d, xzr, x8
+; CHECK-NEXT: cntd x9
; CHECK-NEXT: rdvl x10, #2
-; CHECK-NEXT: mov x11, x9
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: zip2 z0.d, z1.d, z1.d
-; CHECK-NEXT: zip1 z1.d, z1.d, z1.d
+; CHECK-NEXT: mov x11, x9
; CHECK-NEXT: .LBB0_1: // %vector.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: zip2 p2.d, p1.d, p1.d
-; CHECK-NEXT: mov z6.d, z1.d
-; CHECK-NEXT: mov z7.d, z0.d
+; CHECK-NEXT: mov z6.d, z0.d
+; CHECK-NEXT: mov z7.d, z1.d
; CHECK-NEXT: zip1 p1.d, p1.d, p1.d
; CHECK-NEXT: ld1d { z2.d }, p2/z, [x0, #1, mul vl]
; CHECK-NEXT: ld1d { z4.d }, p2/z, [x1, #1, mul vl]
@@ -39,14 +38,14 @@ define %"class.std::complex" @complex_mul_v2f64(ptr %a, ptr %b) {
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #0
; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #90
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #90
-; CHECK-NEXT: mov z0.d, p2/m, z7.d
-; CHECK-NEXT: mov z1.d, p1/m, z6.d
+; CHECK-NEXT: mov z1.d, p2/m, z7.d
+; CHECK-NEXT: mov z0.d, p1/m, z6.d
; CHECK-NEXT: whilelo p1.d, x11, x8
; CHECK-NEXT: add x11, x11, x9
; CHECK-NEXT: b.mi .LBB0_1
; CHECK-NEXT: // %bb.2: // %exit.block
-; CHECK-NEXT: uzp1 z2.d, z1.d, z0.d
-; CHECK-NEXT: uzp2 z1.d, z1.d, z0.d
+; CHECK-NEXT: uzp1 z2.d, z0.d, z1.d
+; CHECK-NEXT: uzp2 z1.d, z0.d, z1.d
; CHECK-NEXT: faddv d0, p0, z2.d
; CHECK-NEXT: faddv d1, p0, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
@@ -111,21 +110,20 @@ exit.block: ; preds = %vector.body
define %"class.std::complex" @complex_mul_predicated_v2f64(ptr %a, ptr %b, ptr %cond) {
; CHECK-LABEL: complex_mul_predicated_v2f64:
; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: movi v1.2d, #0000000000000000
; CHECK-NEXT: cntd x9
-; CHECK-NEXT: mov w11, #100 // =0x64
; CHECK-NEXT: neg x10, x9
+; CHECK-NEXT: mov w11, #100 // =0x64
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov x8, xzr
; CHECK-NEXT: and x10, x10, x11
; CHECK-NEXT: rdvl x11, #2
-; CHECK-NEXT: zip2 z0.d, z1.d, z1.d
-; CHECK-NEXT: zip1 z1.d, z1.d, z1.d
; CHECK-NEXT: .LBB1_1: // %vector.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ld1w { z2.d }, p0/z, [x2, x8, lsl #2]
-; CHECK-NEXT: mov z6.d, z1.d
-; CHECK-NEXT: mov z7.d, z0.d
+; CHECK-NEXT: mov z6.d, z0.d
+; CHECK-NEXT: mov z7.d, z1.d
; CHECK-NEXT: add x8, x8, x9
; CHECK-NEXT: cmpne p1.d, p0/z, z2.d, #0
; CHECK-NEXT: cmp x10, x8
@@ -141,12 +139,12 @@ define %"class.std::complex" @complex_mul_predicated_v2f64(ptr %a, ptr %b, ptr %
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #0
; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #90
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #90
-; CHECK-NEXT: mov z0.d, p2/m, z7.d
-; CHECK-NEXT: mov z1.d, p1/m, z6.d
+; CHECK-NEXT: mov z1.d, p2/m, z7.d
+; CHECK-NEXT: mov z0.d, p1/m, z6.d
; CHECK-NEXT: b.ne .LBB1_1
; CHECK-NEXT: // %bb.2: // %exit.block
-; CHECK-NEXT: uzp1 z2.d, z1.d, z0.d
-; CHECK-NEXT: uzp2 z1.d, z1.d, z0.d
+; CHECK-NEXT: uzp1 z2.d, z0.d, z1.d
+; CHECK-NEXT: uzp2 z1.d, z0.d, z1.d
; CHECK-NEXT: faddv d0, p0, z2.d
; CHECK-NEXT: faddv d1, p0, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
@@ -213,21 +211,20 @@ exit.block: ; preds = %vector.body
define %"class.std::complex" @complex_mul_predicated_x2_v2f64(ptr %a, ptr %b, ptr %cond) {
; CHECK-LABEL: complex_mul_predicated_x2_v2f64:
; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: movi v1.2d, #0000000000000000
; CHECK-NEXT: mov w8, #100 // =0x64
-; CHECK-NEXT: cntd x9
; CHECK-NEXT: whilelo p1.d, xzr, x8
+; CHECK-NEXT: cntd x9
; CHECK-NEXT: rdvl x10, #2
-; CHECK-NEXT: cnth x11
; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: cnth x11
; CHECK-NEXT: mov x12, x9
-; CHECK-NEXT: zip2 z0.d, z1.d, z1.d
-; CHECK-NEXT: zip1 z1.d, z1.d, z1.d
; CHECK-NEXT: .LBB2_1: // %vector.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ld1w { z2.d }, p1/z, [x2]
-; CHECK-NEXT: mov z6.d, z1.d
-; CHECK-NEXT: mov z7.d, z0.d
+; CHECK-NEXT: mov z6.d, z0.d
+; CHECK-NEXT: mov z7.d, z1.d
; CHECK-NEXT: add x2, x2, x11
; CHECK-NEXT: and z2.d, z2.d, #0xffffffff
; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0
@@ -243,14 +240,14 @@ define %"class.std::complex" @complex_mul_predicated_x2_v2f64(ptr %a, ptr %b, pt
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #0
; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #90
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #90
-; CHECK-NEXT: mov z0.d, p2/m, z7.d
-; CHECK-NEXT: mov z1.d, p1/m, z6.d
+; CHECK-NEXT: mov z1.d, p2/m, z7.d
+; CHECK-NEXT: mov z0.d, p1/m, z6.d
; CHECK-NEXT: whilelo p1.d, x12, x8
; CHECK-NEXT: add x12, x12, x9
; CHECK-NEXT: b.mi .LBB2_1
; CHECK-NEXT: // %bb.2: // %exit.block
-; CHECK-NEXT: uzp1 z2.d, z1.d, z0.d
-; CHECK-NEXT: uzp2 z1.d, z1.d, z0.d
+; CHECK-NEXT: uzp1 z2.d, z0.d, z1.d
+; CHECK-NEXT: uzp2 z1.d, z0.d, z1.d
; CHECK-NEXT: faddv d0, p0, z2.d
; CHECK-NEXT: faddv d1, p0, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll
index 29be231920305..0646ca4948e1d 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll
@@ -14,15 +14,14 @@ target triple = "aarch64"
define %"class.std::complex" @complex_mul_v2f64(ptr %a, ptr %b) {
; CHECK-LABEL: complex_mul_v2f64:
; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: movi v1.2d, #0000000000000000
; CHECK-NEXT: cntd x8
-; CHECK-NEXT: mov w10, #100 // =0x64
; CHECK-NEXT: neg x9, x8
+; CHECK-NEXT: mov w10, #100 // =0x64
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: and x9, x9, x10
; CHECK-NEXT: rdvl x10, #2
-; CHECK-NEXT: zip2 z0.d, z1.d, z1.d
-; CHECK-NEXT: zip1 z1.d, z1.d, z1.d
; CHECK-NEXT: .LBB0_1: // %vector.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldr z2, [x0, #1, mul vl]
@@ -32,14 +31,14 @@ define %"class.std::complex" @complex_mul_v2f64(ptr %a, ptr %b) {
; CHECK-NEXT: ldr z5, [x1]
; CHECK-NEXT: add x1, x1, x10
; CHECK-NEXT: add x0, x0, x10
-; CHECK-NEXT: fcmla z1.d, p0/m, z5.d, z3.d, #0
-; CHECK-NEXT: fcmla z0.d, p0/m, z4.d, z2.d, #0
-; CHECK-NEXT: fcmla z1.d, p0/m, z5.d, z3.d, #90
-; CHECK-NEXT: fcmla z0.d, p0/m, z4.d, z2.d, #90
+; CHECK-NEXT: fcmla z0.d, p0/m, z5.d, z3.d, #0
+; CHECK-NEXT: fcmla z1.d, p0/m, z4.d, z2.d, #0
+; CHECK-NEXT: fcmla z0.d, p0/m, z5.d, z3.d, #90
+; CHECK-NEXT: fcmla z1.d, p0/m, z4.d, z2.d, #90
; CHECK-NEXT: b.ne .LBB0_1
; CHECK-NEXT: // %bb.2: // %exit.block
-; CHECK-NEXT: uzp1 z2.d, z1.d, z0.d
-; CHECK-NEXT: uzp2 z1.d, z1.d, z0.d
+; CHECK-NEXT: uzp1 z2.d, z0.d, z1.d
+; CHECK-NEXT: uzp2 z1.d, z0.d, z1.d
; CHECK-NEXT: faddv d0, p0, z2.d
; CHECK-NEXT: faddv d1, p0, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
@@ -183,17 +182,16 @@ exit.block: ; preds = %vector.body
define %"class.std::complex" @complex_mul_v2f64_unrolled(ptr %a, ptr %b) {
; CHECK-LABEL: complex_mul_v2f64_unrolled:
; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: movi v1.2d, #0000000000000000
; CHECK-NEXT: cntw x8
-; CHECK-NEXT: mov w10, #1000 // =0x3e8
+; CHECK-NEXT: movi v2.2d, #0000000000000000
+; CHECK-NEXT: movi v3.2d, #0000000000000000
; CHECK-NEXT: neg x9, x8
+; CHECK-NEXT: mov w10, #1000 // =0x3e8
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: and x9, x9, x10
; CHECK-NEXT: rdvl x10, #4
-; CHECK-NEXT: zip2 z0.d, z1.d, z1.d
-; CHECK-NEXT: zip1 z1.d, z1.d, z1.d
-; CHECK-NEXT: mov z2.d, z1.d
-; CHECK-NEXT: mov z3.d, z0.d
; CHECK-NEXT: .LBB2_1: // %vector.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldr z4, [x0, #1, mul vl]
@@ -207,20 +205,20 @@ define %"class.std::complex" @complex_mul_v2f64_unrolled(ptr %a, ptr %b) {
; CHECK-NEXT: ldr z18, [x1, #3, mul vl]
; CHECK-NEXT: ldr z19, [x1, #2, mul vl]
; CHECK-NEXT: add x1, x1, x10
-; CHECK-NEXT: fcmla z1.d, p0/m, z16.d, z5.d, #0
-; CHECK-NEXT: fcmla z0.d, p0/m, z7.d, z4.d, #0
+; CHECK-NEXT: fcmla z0.d, p0/m, z16.d, z5.d, #0
+; CHECK-NEXT: fcmla z1.d, p0/m, z7.d, z4.d, #0
; CHECK-NEXT: fcmla z3.d, p0/m, z18.d, z6.d, #0
; CHECK-NEXT: fcmla z2.d, p0/m, z19.d, z17.d, #0
-; CHECK-NEXT: fcmla z1.d, p0/m, z16.d, z5.d, #90
-; CHECK-NEXT: fcmla z0.d, p0/m, z7.d, z4.d, #90
+; CHECK-NEXT: fcmla z0.d, p0/m, z16.d, z5.d, #90
+; CHECK-NEXT: fcmla z1.d, p0/m, z7.d, z4.d, #90
; CHECK-NEXT: fcmla z3.d, p0/m, z18.d, z6.d, #90
; CHECK-NEXT: fcmla z2.d, p0/m, z19.d, z17.d, #90
; CHECK-NEXT: b.ne .LBB2_1
; CHECK-NEXT: // %bb.2: // %exit.block
; CHECK-NEXT: uzp1 z4.d, z2.d, z3.d
-; CHECK-NEXT: uzp1 z5.d, z1.d, z0.d
+; CHECK-NEXT: uzp1 z5.d, z0.d, z1.d
; CHECK-NEXT: uzp2 z2.d, z2.d, z3.d
-; CHECK-NEXT: uzp2 z0.d, z1.d, z0.d
+; CHECK-NEXT: uzp2 z0.d, z0.d, z1.d
; CHECK-NEXT: fadd z1.d, z4.d, z5.d
; CHECK-NEXT: fadd z2.d, z2.d, z0.d
; CHECK-NEXT: faddv d0, p0, z1.d
@@ -310,15 +308,15 @@ define dso_local %"class.std::complex" @reduction_mix(ptr %a, ptr %b, ptr noalia
; CHECK-LABEL: reduction_mix:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: movi v2.2d, #0000000000000000
+; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: cntd x9
-; CHECK-NEXT: mov w11, #100 // =0x64
+; CHECK-NEXT: movi v1.2d, #0000000000000000
; CHECK-NEXT: neg x10, x9
+; CHECK-NEXT: mov w11, #100 // =0x64
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov x8, xzr
; CHECK-NEXT: and x10, x10, x11
; CHECK-NEXT: rdvl x11, #2
-; CHECK-NEXT: zip2 z0.d, z2.d, z2.d
-; CHECK-NEXT: zip1 z1.d, z2.d, z2.d
; CHECK-NEXT: .LBB3_1: // %vector.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldr z3, [x0]
@@ -327,13 +325,13 @@ define dso_local %"class.std::complex" @reduction_mix(ptr %a, ptr %b, ptr noalia
; CHECK-NEXT: ld1w { z5.d }, p0/z, [x3, x8, lsl #2]
; CHECK-NEXT: add x8, x8, x9
; CHECK-NEXT: cmp x10, x8
-; CHECK-NEXT: fadd z0.d, z4.d, z0.d
-; CHECK-NEXT: fadd z1.d, z3.d, z1.d
+; CHECK-NEXT: fadd z1.d, z4.d, z1.d
+; CHECK-NEXT: fadd z0.d, z3.d, z0.d
; CHECK-NEXT: add z2.d, z5.d, z2.d
; CHECK-NEXT: b.ne .LBB3_1
; CHECK-NEXT: // %bb.2: // %middle.block
-; CHECK-NEXT: uzp2 z3.d, z1.d, z0.d
-; CHECK-NEXT: uzp1 z1.d, z1.d, z0.d
+; CHECK-NEXT: uzp2 z3.d, z0.d, z1.d
+; CHECK-NEXT: uzp1 z1.d, z0.d, z1.d
; CHECK-NEXT: uaddv d2, p0, z2.d
; CHECK-NEXT: faddv d0, p0, z3.d
; CHECK-NEXT: faddv d1, p0, z1.d
>From b2091fb6344cefa26180449bd6702c3ab7410c90 Mon Sep 17 00:00:00 2001
From: David Sherwood <david.sherwood at arm.com>
Date: Mon, 28 Jul 2025 15:15:30 +0000
Subject: [PATCH 2/2] Remove constant folds
---
.../llvm/Analysis/InstSimplifyFolder.h | 4 --
llvm/include/llvm/Analysis/TargetFolder.h | 18 ------
llvm/include/llvm/IR/ConstantFolder.h | 18 ------
llvm/include/llvm/IR/IRBuilderFolder.h | 2 -
llvm/include/llvm/IR/NoFolder.h | 4 --
llvm/lib/Analysis/ConstantFolding.cpp | 12 ----
llvm/lib/IR/IRBuilder.cpp | 4 --
.../complex-deinterleaving-opt-crash.ll | 6 +-
...rleaving-reductions-predicated-scalable.ll | 55 ++++++++++---------
...plex-deinterleaving-reductions-scalable.ll | 52 +++++++++---------
10 files changed, 60 insertions(+), 115 deletions(-)
diff --git a/llvm/include/llvm/Analysis/InstSimplifyFolder.h b/llvm/include/llvm/Analysis/InstSimplifyFolder.h
index b7e3decb91d61..58793ed977f68 100644
--- a/llvm/include/llvm/Analysis/InstSimplifyFolder.h
+++ b/llvm/include/llvm/Analysis/InstSimplifyFolder.h
@@ -125,10 +125,6 @@ class LLVM_ABI InstSimplifyFolder final : public IRBuilderFolder {
dyn_cast_if_present<CallBase>(FMFSource));
}
- Value *FoldVectorInterleave(ArrayRef<Value *> Ops) const override {
- return ConstFolder.FoldVectorInterleave(Ops);
- }
-
//===--------------------------------------------------------------------===//
// Cast/Conversion Operators
//===--------------------------------------------------------------------===//
diff --git a/llvm/include/llvm/Analysis/TargetFolder.h b/llvm/include/llvm/Analysis/TargetFolder.h
index 151ff93975ae6..d27455cf3505d 100644
--- a/llvm/include/llvm/Analysis/TargetFolder.h
+++ b/llvm/include/llvm/Analysis/TargetFolder.h
@@ -189,24 +189,6 @@ class LLVM_ABI TargetFolder final : public IRBuilderFolder {
return nullptr;
}
- Value *FoldVectorInterleave(ArrayRef<Value *> Ops) const override {
- // Check to see if all operands are the same.
- for (unsigned I = 1; I < Ops.size(); I++) {
- if (Ops[I] != Ops[0])
- return nullptr;
- }
-
- // Is this just a large splat?
- if (auto *C = dyn_cast<Constant>(Ops[0])) {
- if (auto *V = C->getSplatValue()) {
- auto *SubvecTy = cast<VectorType>(Ops[0]->getType());
- return ConstantVector::getSplat(
- SubvecTy->getElementCount() * Ops.size(), V);
- }
- }
- return nullptr;
- }
-
Value *FoldBinaryIntrinsic(Intrinsic::ID ID, Value *LHS, Value *RHS, Type *Ty,
Instruction *FMFSource) const override {
auto *C1 = dyn_cast<Constant>(LHS);
diff --git a/llvm/include/llvm/IR/ConstantFolder.h b/llvm/include/llvm/IR/ConstantFolder.h
index 578f44aabf0e9..26b7242abc4d9 100644
--- a/llvm/include/llvm/IR/ConstantFolder.h
+++ b/llvm/include/llvm/IR/ConstantFolder.h
@@ -181,24 +181,6 @@ class LLVM_ABI ConstantFolder final : public IRBuilderFolder {
return nullptr;
}
- Value *FoldVectorInterleave(ArrayRef<Value *> Ops) const override {
- // Check to see if all operands are the same.
- for (unsigned I = 1; I < Ops.size(); I++) {
- if (Ops[I] != Ops[0])
- return nullptr;
- }
-
- // Is this just a large splat?
- if (auto *C = dyn_cast<Constant>(Ops[0])) {
- if (auto *V = C->getSplatValue()) {
- auto *SubvecTy = cast<VectorType>(Ops[0]->getType());
- return ConstantVector::getSplat(
- SubvecTy->getElementCount() * Ops.size(), V);
- }
- }
- return nullptr;
- }
-
Value *FoldBinaryIntrinsic(Intrinsic::ID ID, Value *LHS, Value *RHS, Type *Ty,
Instruction *FMFSource) const override {
// Use TargetFolder or InstSimplifyFolder instead.
diff --git a/llvm/include/llvm/IR/IRBuilderFolder.h b/llvm/include/llvm/IR/IRBuilderFolder.h
index 24416712c3d82..db4ab5af2433a 100644
--- a/llvm/include/llvm/IR/IRBuilderFolder.h
+++ b/llvm/include/llvm/IR/IRBuilderFolder.h
@@ -75,8 +75,6 @@ class LLVM_ABI IRBuilderFolder {
virtual Value *FoldCast(Instruction::CastOps Op, Value *V,
Type *DestTy) const = 0;
- virtual Value *FoldVectorInterleave(ArrayRef<Value *> Ops) const = 0;
-
virtual Value *
FoldBinaryIntrinsic(Intrinsic::ID ID, Value *LHS, Value *RHS, Type *Ty,
Instruction *FMFSource = nullptr) const = 0;
diff --git a/llvm/include/llvm/IR/NoFolder.h b/llvm/include/llvm/IR/NoFolder.h
index cb843c8f7a19e..9f16c6983313a 100644
--- a/llvm/include/llvm/IR/NoFolder.h
+++ b/llvm/include/llvm/IR/NoFolder.h
@@ -118,10 +118,6 @@ class LLVM_ABI NoFolder final : public IRBuilderFolder {
return nullptr;
}
- Value *FoldVectorInterleave(ArrayRef<Value *> Ops) const override {
- return nullptr;
- }
-
//===--------------------------------------------------------------------===//
// Cast/Conversion Operators
//===--------------------------------------------------------------------===//
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index ea4c49db92818..759c553111d06 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -1641,19 +1641,7 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
case Intrinsic::vector_extract:
case Intrinsic::vector_insert:
case Intrinsic::vector_interleave2:
- case Intrinsic::vector_interleave3:
- case Intrinsic::vector_interleave4:
- case Intrinsic::vector_interleave5:
- case Intrinsic::vector_interleave6:
- case Intrinsic::vector_interleave7:
- case Intrinsic::vector_interleave8:
case Intrinsic::vector_deinterleave2:
- case Intrinsic::vector_deinterleave3:
- case Intrinsic::vector_deinterleave4:
- case Intrinsic::vector_deinterleave5:
- case Intrinsic::vector_deinterleave6:
- case Intrinsic::vector_deinterleave7:
- case Intrinsic::vector_deinterleave8:
// Target intrinsics
case Intrinsic::amdgcn_perm:
case Intrinsic::amdgcn_wave_reduce_umin:
diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp
index 35f47f23196fa..49c6dc7f401ad 100644
--- a/llvm/lib/IR/IRBuilder.cpp
+++ b/llvm/lib/IR/IRBuilder.cpp
@@ -13,7 +13,6 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
@@ -1160,9 +1159,6 @@ Value *IRBuilderBase::CreateVectorInterleave(ArrayRef<Value *> Ops,
}
#endif
- if (auto *V = Folder.FoldVectorInterleave(Ops))
- return V;
-
unsigned IID = Intrinsic::getInterleaveIntrinsicID(Ops.size());
auto *SubvecTy = cast<VectorType>(Ops[0]->getType());
Type *DestTy = VectorType::get(SubvecTy->getElementType(),
diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-opt-crash.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-opt-crash.ll
index b3939268832f7..b62dbc0ee8ea3 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-opt-crash.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-opt-crash.ll
@@ -9,9 +9,10 @@ define void @reprocessing_crash() #0 {
; CHECK-LABEL: define void @reprocessing_crash(
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.vector.interleave2.nxv4f64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x double> zeroinitializer)
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[TMP1:%.*]] = phi <vscale x 4 x double> [ zeroinitializer, %[[ENTRY]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = phi <vscale x 4 x double> [ [[TMP0]], %[[ENTRY]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP2]] = fsub <vscale x 4 x double> [[TMP1]], zeroinitializer
; CHECK-NEXT: br i1 false, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]]
; CHECK: [[MIDDLE_BLOCK]]:
@@ -47,10 +48,11 @@ define double @test_fp_single_reduction(i1 %c) #2 {
; CHECK-LABEL: define double @test_fp_single_reduction(
; CHECK-SAME: i1 [[C:%.*]]) #[[ATTR1:[0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = call <8 x double> @llvm.vector.interleave2.v8f64(<4 x double> zeroinitializer, <4 x double> zeroinitializer)
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[VEC_PHI218:%.*]] = phi <4 x double> [ zeroinitializer, %[[ENTRY]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP1:%.*]] = phi <8 x double> [ zeroinitializer, %[[ENTRY]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = phi <8 x double> [ [[TMP0]], %[[ENTRY]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x double> zeroinitializer, <8 x double> zeroinitializer, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
; CHECK-NEXT: [[TMP2]] = fadd <4 x double> [[VEC_PHI218]], [[STRIDED_VEC]]
; CHECK-NEXT: [[TMP3]] = fadd <8 x double> [[TMP1]], zeroinitializer
diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll
index d67aa08125f74..880bd2904154c 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll
@@ -14,19 +14,20 @@ target triple = "aarch64"
define %"class.std::complex" @complex_mul_v2f64(ptr %a, ptr %b) {
; CHECK-LABEL: complex_mul_v2f64:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: movi v1.2d, #0000000000000000
; CHECK-NEXT: mov w8, #100 // =0x64
-; CHECK-NEXT: whilelo p1.d, xzr, x8
; CHECK-NEXT: cntd x9
+; CHECK-NEXT: whilelo p1.d, xzr, x8
; CHECK-NEXT: rdvl x10, #2
-; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov x11, x9
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: zip2 z0.d, z1.d, z1.d
+; CHECK-NEXT: zip1 z1.d, z1.d, z1.d
; CHECK-NEXT: .LBB0_1: // %vector.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: zip2 p2.d, p1.d, p1.d
-; CHECK-NEXT: mov z6.d, z0.d
-; CHECK-NEXT: mov z7.d, z1.d
+; CHECK-NEXT: mov z6.d, z1.d
+; CHECK-NEXT: mov z7.d, z0.d
; CHECK-NEXT: zip1 p1.d, p1.d, p1.d
; CHECK-NEXT: ld1d { z2.d }, p2/z, [x0, #1, mul vl]
; CHECK-NEXT: ld1d { z4.d }, p2/z, [x1, #1, mul vl]
@@ -38,14 +39,14 @@ define %"class.std::complex" @complex_mul_v2f64(ptr %a, ptr %b) {
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #0
; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #90
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #90
-; CHECK-NEXT: mov z1.d, p2/m, z7.d
-; CHECK-NEXT: mov z0.d, p1/m, z6.d
+; CHECK-NEXT: mov z0.d, p2/m, z7.d
+; CHECK-NEXT: mov z1.d, p1/m, z6.d
; CHECK-NEXT: whilelo p1.d, x11, x8
; CHECK-NEXT: add x11, x11, x9
; CHECK-NEXT: b.mi .LBB0_1
; CHECK-NEXT: // %bb.2: // %exit.block
-; CHECK-NEXT: uzp1 z2.d, z0.d, z1.d
-; CHECK-NEXT: uzp2 z1.d, z0.d, z1.d
+; CHECK-NEXT: uzp1 z2.d, z1.d, z0.d
+; CHECK-NEXT: uzp2 z1.d, z1.d, z0.d
; CHECK-NEXT: faddv d0, p0, z2.d
; CHECK-NEXT: faddv d1, p0, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
@@ -110,20 +111,21 @@ exit.block: ; preds = %vector.body
define %"class.std::complex" @complex_mul_predicated_v2f64(ptr %a, ptr %b, ptr %cond) {
; CHECK-LABEL: complex_mul_predicated_v2f64:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: movi v1.2d, #0000000000000000
; CHECK-NEXT: cntd x9
-; CHECK-NEXT: neg x10, x9
; CHECK-NEXT: mov w11, #100 // =0x64
+; CHECK-NEXT: neg x10, x9
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov x8, xzr
; CHECK-NEXT: and x10, x10, x11
; CHECK-NEXT: rdvl x11, #2
+; CHECK-NEXT: zip2 z0.d, z1.d, z1.d
+; CHECK-NEXT: zip1 z1.d, z1.d, z1.d
; CHECK-NEXT: .LBB1_1: // %vector.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ld1w { z2.d }, p0/z, [x2, x8, lsl #2]
-; CHECK-NEXT: mov z6.d, z0.d
-; CHECK-NEXT: mov z7.d, z1.d
+; CHECK-NEXT: mov z6.d, z1.d
+; CHECK-NEXT: mov z7.d, z0.d
; CHECK-NEXT: add x8, x8, x9
; CHECK-NEXT: cmpne p1.d, p0/z, z2.d, #0
; CHECK-NEXT: cmp x10, x8
@@ -139,12 +141,12 @@ define %"class.std::complex" @complex_mul_predicated_v2f64(ptr %a, ptr %b, ptr %
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #0
; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #90
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #90
-; CHECK-NEXT: mov z1.d, p2/m, z7.d
-; CHECK-NEXT: mov z0.d, p1/m, z6.d
+; CHECK-NEXT: mov z0.d, p2/m, z7.d
+; CHECK-NEXT: mov z1.d, p1/m, z6.d
; CHECK-NEXT: b.ne .LBB1_1
; CHECK-NEXT: // %bb.2: // %exit.block
-; CHECK-NEXT: uzp1 z2.d, z0.d, z1.d
-; CHECK-NEXT: uzp2 z1.d, z0.d, z1.d
+; CHECK-NEXT: uzp1 z2.d, z1.d, z0.d
+; CHECK-NEXT: uzp2 z1.d, z1.d, z0.d
; CHECK-NEXT: faddv d0, p0, z2.d
; CHECK-NEXT: faddv d1, p0, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
@@ -211,20 +213,21 @@ exit.block: ; preds = %vector.body
define %"class.std::complex" @complex_mul_predicated_x2_v2f64(ptr %a, ptr %b, ptr %cond) {
; CHECK-LABEL: complex_mul_predicated_x2_v2f64:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: movi v1.2d, #0000000000000000
; CHECK-NEXT: mov w8, #100 // =0x64
-; CHECK-NEXT: whilelo p1.d, xzr, x8
; CHECK-NEXT: cntd x9
+; CHECK-NEXT: whilelo p1.d, xzr, x8
; CHECK-NEXT: rdvl x10, #2
-; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: cnth x11
+; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov x12, x9
+; CHECK-NEXT: zip2 z0.d, z1.d, z1.d
+; CHECK-NEXT: zip1 z1.d, z1.d, z1.d
; CHECK-NEXT: .LBB2_1: // %vector.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ld1w { z2.d }, p1/z, [x2]
-; CHECK-NEXT: mov z6.d, z0.d
-; CHECK-NEXT: mov z7.d, z1.d
+; CHECK-NEXT: mov z6.d, z1.d
+; CHECK-NEXT: mov z7.d, z0.d
; CHECK-NEXT: add x2, x2, x11
; CHECK-NEXT: and z2.d, z2.d, #0xffffffff
; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0
@@ -240,14 +243,14 @@ define %"class.std::complex" @complex_mul_predicated_x2_v2f64(ptr %a, ptr %b, pt
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #0
; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #90
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #90
-; CHECK-NEXT: mov z1.d, p2/m, z7.d
-; CHECK-NEXT: mov z0.d, p1/m, z6.d
+; CHECK-NEXT: mov z0.d, p2/m, z7.d
+; CHECK-NEXT: mov z1.d, p1/m, z6.d
; CHECK-NEXT: whilelo p1.d, x12, x8
; CHECK-NEXT: add x12, x12, x9
; CHECK-NEXT: b.mi .LBB2_1
; CHECK-NEXT: // %bb.2: // %exit.block
-; CHECK-NEXT: uzp1 z2.d, z0.d, z1.d
-; CHECK-NEXT: uzp2 z1.d, z0.d, z1.d
+; CHECK-NEXT: uzp1 z2.d, z1.d, z0.d
+; CHECK-NEXT: uzp2 z1.d, z1.d, z0.d
; CHECK-NEXT: faddv d0, p0, z2.d
; CHECK-NEXT: faddv d1, p0, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll
index 0646ca4948e1d..29be231920305 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll
@@ -14,14 +14,15 @@ target triple = "aarch64"
define %"class.std::complex" @complex_mul_v2f64(ptr %a, ptr %b) {
; CHECK-LABEL: complex_mul_v2f64:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: movi v1.2d, #0000000000000000
; CHECK-NEXT: cntd x8
-; CHECK-NEXT: neg x9, x8
; CHECK-NEXT: mov w10, #100 // =0x64
+; CHECK-NEXT: neg x9, x8
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: and x9, x9, x10
; CHECK-NEXT: rdvl x10, #2
+; CHECK-NEXT: zip2 z0.d, z1.d, z1.d
+; CHECK-NEXT: zip1 z1.d, z1.d, z1.d
; CHECK-NEXT: .LBB0_1: // %vector.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldr z2, [x0, #1, mul vl]
@@ -31,14 +32,14 @@ define %"class.std::complex" @complex_mul_v2f64(ptr %a, ptr %b) {
; CHECK-NEXT: ldr z5, [x1]
; CHECK-NEXT: add x1, x1, x10
; CHECK-NEXT: add x0, x0, x10
-; CHECK-NEXT: fcmla z0.d, p0/m, z5.d, z3.d, #0
-; CHECK-NEXT: fcmla z1.d, p0/m, z4.d, z2.d, #0
-; CHECK-NEXT: fcmla z0.d, p0/m, z5.d, z3.d, #90
-; CHECK-NEXT: fcmla z1.d, p0/m, z4.d, z2.d, #90
+; CHECK-NEXT: fcmla z1.d, p0/m, z5.d, z3.d, #0
+; CHECK-NEXT: fcmla z0.d, p0/m, z4.d, z2.d, #0
+; CHECK-NEXT: fcmla z1.d, p0/m, z5.d, z3.d, #90
+; CHECK-NEXT: fcmla z0.d, p0/m, z4.d, z2.d, #90
; CHECK-NEXT: b.ne .LBB0_1
; CHECK-NEXT: // %bb.2: // %exit.block
-; CHECK-NEXT: uzp1 z2.d, z0.d, z1.d
-; CHECK-NEXT: uzp2 z1.d, z0.d, z1.d
+; CHECK-NEXT: uzp1 z2.d, z1.d, z0.d
+; CHECK-NEXT: uzp2 z1.d, z1.d, z0.d
; CHECK-NEXT: faddv d0, p0, z2.d
; CHECK-NEXT: faddv d1, p0, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
@@ -182,16 +183,17 @@ exit.block: ; preds = %vector.body
define %"class.std::complex" @complex_mul_v2f64_unrolled(ptr %a, ptr %b) {
; CHECK-LABEL: complex_mul_v2f64_unrolled:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: movi v1.2d, #0000000000000000
; CHECK-NEXT: cntw x8
-; CHECK-NEXT: movi v2.2d, #0000000000000000
-; CHECK-NEXT: movi v3.2d, #0000000000000000
-; CHECK-NEXT: neg x9, x8
; CHECK-NEXT: mov w10, #1000 // =0x3e8
+; CHECK-NEXT: neg x9, x8
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: and x9, x9, x10
; CHECK-NEXT: rdvl x10, #4
+; CHECK-NEXT: zip2 z0.d, z1.d, z1.d
+; CHECK-NEXT: zip1 z1.d, z1.d, z1.d
+; CHECK-NEXT: mov z2.d, z1.d
+; CHECK-NEXT: mov z3.d, z0.d
; CHECK-NEXT: .LBB2_1: // %vector.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldr z4, [x0, #1, mul vl]
@@ -205,20 +207,20 @@ define %"class.std::complex" @complex_mul_v2f64_unrolled(ptr %a, ptr %b) {
; CHECK-NEXT: ldr z18, [x1, #3, mul vl]
; CHECK-NEXT: ldr z19, [x1, #2, mul vl]
; CHECK-NEXT: add x1, x1, x10
-; CHECK-NEXT: fcmla z0.d, p0/m, z16.d, z5.d, #0
-; CHECK-NEXT: fcmla z1.d, p0/m, z7.d, z4.d, #0
+; CHECK-NEXT: fcmla z1.d, p0/m, z16.d, z5.d, #0
+; CHECK-NEXT: fcmla z0.d, p0/m, z7.d, z4.d, #0
; CHECK-NEXT: fcmla z3.d, p0/m, z18.d, z6.d, #0
; CHECK-NEXT: fcmla z2.d, p0/m, z19.d, z17.d, #0
-; CHECK-NEXT: fcmla z0.d, p0/m, z16.d, z5.d, #90
-; CHECK-NEXT: fcmla z1.d, p0/m, z7.d, z4.d, #90
+; CHECK-NEXT: fcmla z1.d, p0/m, z16.d, z5.d, #90
+; CHECK-NEXT: fcmla z0.d, p0/m, z7.d, z4.d, #90
; CHECK-NEXT: fcmla z3.d, p0/m, z18.d, z6.d, #90
; CHECK-NEXT: fcmla z2.d, p0/m, z19.d, z17.d, #90
; CHECK-NEXT: b.ne .LBB2_1
; CHECK-NEXT: // %bb.2: // %exit.block
; CHECK-NEXT: uzp1 z4.d, z2.d, z3.d
-; CHECK-NEXT: uzp1 z5.d, z0.d, z1.d
+; CHECK-NEXT: uzp1 z5.d, z1.d, z0.d
; CHECK-NEXT: uzp2 z2.d, z2.d, z3.d
-; CHECK-NEXT: uzp2 z0.d, z0.d, z1.d
+; CHECK-NEXT: uzp2 z0.d, z1.d, z0.d
; CHECK-NEXT: fadd z1.d, z4.d, z5.d
; CHECK-NEXT: fadd z2.d, z2.d, z0.d
; CHECK-NEXT: faddv d0, p0, z1.d
@@ -308,15 +310,15 @@ define dso_local %"class.std::complex" @reduction_mix(ptr %a, ptr %b, ptr noalia
; CHECK-LABEL: reduction_mix:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: movi v2.2d, #0000000000000000
-; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: cntd x9
-; CHECK-NEXT: movi v1.2d, #0000000000000000
-; CHECK-NEXT: neg x10, x9
; CHECK-NEXT: mov w11, #100 // =0x64
+; CHECK-NEXT: neg x10, x9
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov x8, xzr
; CHECK-NEXT: and x10, x10, x11
; CHECK-NEXT: rdvl x11, #2
+; CHECK-NEXT: zip2 z0.d, z2.d, z2.d
+; CHECK-NEXT: zip1 z1.d, z2.d, z2.d
; CHECK-NEXT: .LBB3_1: // %vector.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldr z3, [x0]
@@ -325,13 +327,13 @@ define dso_local %"class.std::complex" @reduction_mix(ptr %a, ptr %b, ptr noalia
; CHECK-NEXT: ld1w { z5.d }, p0/z, [x3, x8, lsl #2]
; CHECK-NEXT: add x8, x8, x9
; CHECK-NEXT: cmp x10, x8
-; CHECK-NEXT: fadd z1.d, z4.d, z1.d
-; CHECK-NEXT: fadd z0.d, z3.d, z0.d
+; CHECK-NEXT: fadd z0.d, z4.d, z0.d
+; CHECK-NEXT: fadd z1.d, z3.d, z1.d
; CHECK-NEXT: add z2.d, z5.d, z2.d
; CHECK-NEXT: b.ne .LBB3_1
; CHECK-NEXT: // %bb.2: // %middle.block
-; CHECK-NEXT: uzp2 z3.d, z0.d, z1.d
-; CHECK-NEXT: uzp1 z1.d, z0.d, z1.d
+; CHECK-NEXT: uzp2 z3.d, z1.d, z0.d
+; CHECK-NEXT: uzp1 z1.d, z1.d, z0.d
; CHECK-NEXT: uaddv d2, p0, z2.d
; CHECK-NEXT: faddv d0, p0, z3.d
; CHECK-NEXT: faddv d1, p0, z1.d
More information about the llvm-commits
mailing list