[llvm] [VectorCombine] Add free concats to shuffleToIdentity. (PR #94954)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 10 03:40:33 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: David Green (davemgreen)
<details>
<summary>Changes</summary>
This is another relatively small adjustment to shuffleToIdentity, which has had a few knock-one effects to need a few more changes. It attempts to detect free concats, that will be legalized to multiple vector operations. For example if the lanes are '[a[0], a[1], b[0], b[1]]' and a and b are v2f64 under aarch64.
In order to do this:
- isFreeConcat detects whether the input has piece-wise identities from multiple inputs that can become a concat.
- A tree of concat shuffles is created to concatenate the input values into a single vector. This is a little different to most other inputs as there are created from multiple values that are being combined together, and we cannot rely on the Lane0 insert location always being valid.
- The insert location is changed to the original location instead of updating per item, which ensure it is valid due to the order that we visit and create items.
- As with splats/identities, an input value could both be a concat _and_ a splat. To make the creation simpler the Items have been changed to be the Use, not Value, to help better identify values more reliably without needing to recompute whether they are identities/splats/concats.
---
Patch is 43.44 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/94954.diff
4 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/VectorCombine.cpp (+129-72)
- (modified) llvm/test/Transforms/PhaseOrdering/AArch64/interleavevectorization.ll (+7-7)
- (modified) llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity-concat.ll (+33-138)
- (modified) llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll (+20-20)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index e608c7fb60468..0d36e29db7b43 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -1669,20 +1669,20 @@ bool VectorCombine::foldShuffleOfShuffles(Instruction &I) {
return true;
}
-using InstLane = std::pair<Value *, int>;
+using InstLane = std::pair<Use *, int>;
-static InstLane lookThroughShuffles(Value *V, int Lane) {
- while (auto *SV = dyn_cast<ShuffleVectorInst>(V)) {
+static InstLane lookThroughShuffles(Use *V, int Lane) {
+ while (auto *SV = dyn_cast<ShuffleVectorInst>(V->get())) {
unsigned NumElts =
cast<FixedVectorType>(SV->getOperand(0)->getType())->getNumElements();
int M = SV->getMaskValue(Lane);
if (M < 0)
return {nullptr, PoisonMaskElem};
if (static_cast<unsigned>(M) < NumElts) {
- V = SV->getOperand(0);
+ V = &SV->getOperandUse(0);
Lane = M;
} else {
- V = SV->getOperand(1);
+ V = &SV->getOperandUse(1);
Lane = M - NumElts;
}
}
@@ -1695,37 +1695,83 @@ generateInstLaneVectorFromOperand(ArrayRef<InstLane> Item, int Op) {
for (InstLane IL : Item) {
auto [V, Lane] = IL;
InstLane OpLane =
- V ? lookThroughShuffles(cast<Instruction>(V)->getOperand(Op), Lane)
+ V ? lookThroughShuffles(&cast<Instruction>(V->get())->getOperandUse(Op),
+ Lane)
: InstLane{nullptr, PoisonMaskElem};
NItem.emplace_back(OpLane);
}
return NItem;
}
+/// Detect concat of multiple values into a vector
+static bool isFreeConcat(ArrayRef<InstLane> Item,
+ const TargetTransformInfo &TTI) {
+ auto *Ty = cast<FixedVectorType>(Item.front().first->get()->getType());
+ unsigned NumElts = Ty->getNumElements();
+ if (Item.size() == NumElts || NumElts == 1 || Item.size() % NumElts != 0)
+ return false;
+
+ // Check that the concat is free, usually meaning that the type will be split
+ // during legalization.
+ SmallVector<int, 16> ConcatMask(Ty->getNumElements() * 2);
+ std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
+ if (TTI.getShuffleCost(TTI::SK_PermuteTwoSrc, Ty, ConcatMask,
+ TTI::TCK_RecipThroughput) != 0)
+ return false;
+
+ unsigned NumSlices = Item.size() / NumElts;
+ // Currently we generate a tree of shuffles for the concats, which limits us
+ // to a power2.
+ if (!isPowerOf2_32(NumSlices))
+ return false;
+ for (unsigned Slice = 0; Slice < NumSlices; ++Slice) {
+ Use *SliceV = Item[Slice * NumElts].first;
+ if (!SliceV || SliceV->get()->getType() != Ty)
+ return false;
+ for (unsigned Elt = 0; Elt < NumElts; ++Elt) {
+ auto [V, Lane] = Item[Slice * NumElts + Elt];
+ if (Lane != static_cast<int>(Elt) || SliceV->get() != V->get())
+ return false;
+ }
+ }
+ return true;
+}
+
static Value *generateNewInstTree(ArrayRef<InstLane> Item, FixedVectorType *Ty,
- const SmallPtrSet<Value *, 4> &IdentityLeafs,
- const SmallPtrSet<Value *, 4> &SplatLeafs,
+ const SmallPtrSet<Use *, 4> &IdentityLeafs,
+ const SmallPtrSet<Use *, 4> &SplatLeafs,
+ const SmallPtrSet<Use *, 4> &ConcatLeafs,
IRBuilder<> &Builder) {
auto [FrontV, FrontLane] = Item.front();
- if (IdentityLeafs.contains(FrontV) &&
- all_of(drop_begin(enumerate(Item)), [Item](const auto &E) {
- Value *FrontV = Item.front().first;
- auto [V, Lane] = E.value();
- return !V || (V == FrontV && Lane == (int)E.index());
- })) {
- return FrontV;
+ if (IdentityLeafs.contains(FrontV)) {
+ return FrontV->get();
}
if (SplatLeafs.contains(FrontV)) {
- if (auto *ILI = dyn_cast<Instruction>(FrontV))
- Builder.SetInsertPoint(*ILI->getInsertionPointAfterDef());
- else if (auto *Arg = dyn_cast<Argument>(FrontV))
- Builder.SetInsertPointPastAllocas(Arg->getParent());
SmallVector<int, 16> Mask(Ty->getNumElements(), FrontLane);
- return Builder.CreateShuffleVector(FrontV, Mask);
+ return Builder.CreateShuffleVector(FrontV->get(), Mask);
+ }
+ if (ConcatLeafs.contains(FrontV)) {
+ unsigned NumElts =
+ cast<FixedVectorType>(FrontV->get()->getType())->getNumElements();
+ SmallVector<Value *> Values(Item.size() / NumElts, nullptr);
+ for (unsigned S = 0; S < Values.size(); ++S)
+ Values[S] = Item[S * NumElts].first->get();
+
+ while (Values.size() > 1) {
+ NumElts *= 2;
+ SmallVector<int, 16> Mask(NumElts, 0);
+ std::iota(Mask.begin(), Mask.end(), 0);
+ SmallVector<Value *> NewValues(Values.size() / 2, nullptr);
+ for (unsigned S = 0; S < NewValues.size(); ++S)
+ NewValues[S] =
+ Builder.CreateShuffleVector(Values[S * 2], Values[S * 2 + 1], Mask);
+ Values = NewValues;
+ }
+ return Values[0];
}
- auto *I = cast<Instruction>(FrontV);
+ auto *I = cast<Instruction>(FrontV->get());
auto *II = dyn_cast<IntrinsicInst>(I);
unsigned NumOps = I->getNumOperands() - (II ? 1 : 0);
SmallVector<Value *> Ops(NumOps);
@@ -1734,16 +1780,16 @@ static Value *generateNewInstTree(ArrayRef<InstLane> Item, FixedVectorType *Ty,
Ops[Idx] = II->getOperand(Idx);
continue;
}
- Ops[Idx] = generateNewInstTree(generateInstLaneVectorFromOperand(Item, Idx),
- Ty, IdentityLeafs, SplatLeafs, Builder);
+ Ops[Idx] =
+ generateNewInstTree(generateInstLaneVectorFromOperand(Item, Idx), Ty,
+ IdentityLeafs, SplatLeafs, ConcatLeafs, Builder);
}
SmallVector<Value *, 8> ValueList;
for (const auto &Lane : Item)
if (Lane.first)
- ValueList.push_back(Lane.first);
+ ValueList.push_back(Lane.first->get());
- Builder.SetInsertPoint(I);
Type *DstTy =
FixedVectorType::get(I->getType()->getScalarType(), Ty->getNumElements());
if (auto *BI = dyn_cast<BinaryOperator>(I)) {
@@ -1785,16 +1831,16 @@ static Value *generateNewInstTree(ArrayRef<InstLane> Item, FixedVectorType *Ty,
// do so.
bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
auto *Ty = dyn_cast<FixedVectorType>(I.getType());
- if (!Ty)
+ if (!Ty || I.use_empty())
return false;
SmallVector<InstLane> Start(Ty->getNumElements());
for (unsigned M = 0, E = Ty->getNumElements(); M < E; ++M)
- Start[M] = lookThroughShuffles(&I, M);
+ Start[M] = lookThroughShuffles(&*I.use_begin(), M);
SmallVector<SmallVector<InstLane>> Worklist;
Worklist.push_back(Start);
- SmallPtrSet<Value *, 4> IdentityLeafs, SplatLeafs;
+ SmallPtrSet<Use *, 4> IdentityLeafs, SplatLeafs, ConcatLeafs;
unsigned NumVisited = 0;
while (!Worklist.empty()) {
@@ -1809,12 +1855,12 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
return false;
// Look for an identity value.
- if (!FrontLane &&
- cast<FixedVectorType>(FrontV->getType())->getNumElements() ==
+ if (FrontLane == 0 &&
+ cast<FixedVectorType>(FrontV->get()->getType())->getNumElements() ==
Ty->getNumElements() &&
all_of(drop_begin(enumerate(Item)), [Item](const auto &E) {
- Value *FrontV = Item.front().first;
- return !E.value().first || (E.value().first == FrontV &&
+ Value *FrontV = Item.front().first->get();
+ return !E.value().first || (E.value().first->get() == FrontV &&
E.value().second == (int)E.index());
})) {
IdentityLeafs.insert(FrontV);
@@ -1824,9 +1870,9 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
if (auto *C = dyn_cast<Constant>(FrontV);
C && C->getSplatValue() &&
all_of(drop_begin(Item), [Item](InstLane &IL) {
- Value *FrontV = Item.front().first;
- Value *V = IL.first;
- return !V || V == FrontV;
+ Value *FrontV = Item.front().first->get();
+ Use *V = IL.first;
+ return !V || V->get() == FrontV;
})) {
SplatLeafs.insert(FrontV);
continue;
@@ -1835,7 +1881,7 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
if (all_of(drop_begin(Item), [Item](InstLane &IL) {
auto [FrontV, FrontLane] = Item.front();
auto [V, Lane] = IL;
- return !V || (V == FrontV && Lane == FrontLane);
+ return !V || (V->get() == FrontV->get() && Lane == FrontLane);
})) {
SplatLeafs.insert(FrontV);
continue;
@@ -1843,11 +1889,11 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
// We need each element to be the same type of value, and check that each
// element has a single use.
- if (!all_of(drop_begin(Item), [Item](InstLane IL) {
- Value *FrontV = Item.front().first;
- Value *V = IL.first;
- if (!V)
+ if (all_of(drop_begin(Item), [Item](InstLane IL) {
+ Value *FrontV = Item.front().first->get();
+ if (!IL.first)
return true;
+ Value *V = IL.first->get();
if (auto *I = dyn_cast<Instruction>(V); I && !I->hasOneUse())
return false;
if (V->getValueID() != FrontV->getValueID())
@@ -1864,40 +1910,49 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
return !II || (isa<IntrinsicInst>(FrontV) &&
II->getIntrinsicID() ==
cast<IntrinsicInst>(FrontV)->getIntrinsicID());
- }))
- return false;
-
- // Check the operator is one that we support. We exclude div/rem in case
- // they hit UB from poison lanes.
- if ((isa<BinaryOperator>(FrontV) &&
- !cast<BinaryOperator>(FrontV)->isIntDivRem()) ||
- isa<CmpInst>(FrontV)) {
- Worklist.push_back(generateInstLaneVectorFromOperand(Item, 0));
- Worklist.push_back(generateInstLaneVectorFromOperand(Item, 1));
- } else if (isa<UnaryOperator, TruncInst, ZExtInst, SExtInst>(FrontV)) {
- Worklist.push_back(generateInstLaneVectorFromOperand(Item, 0));
- } else if (isa<SelectInst>(FrontV)) {
- Worklist.push_back(generateInstLaneVectorFromOperand(Item, 0));
- Worklist.push_back(generateInstLaneVectorFromOperand(Item, 1));
- Worklist.push_back(generateInstLaneVectorFromOperand(Item, 2));
- } else if (auto *II = dyn_cast<IntrinsicInst>(FrontV);
- II && isTriviallyVectorizable(II->getIntrinsicID())) {
- for (unsigned Op = 0, E = II->getNumOperands() - 1; Op < E; Op++) {
- if (isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(), Op)) {
- if (!all_of(drop_begin(Item), [Item, Op](InstLane &IL) {
- Value *FrontV = Item.front().first;
- Value *V = IL.first;
- return !V || (cast<Instruction>(V)->getOperand(Op) ==
- cast<Instruction>(FrontV)->getOperand(Op));
- }))
- return false;
- continue;
+ })) {
+ // Check the operator is one that we support.
+ if (isa<BinaryOperator, CmpInst>(FrontV)) {
+ // We exclude div/rem in case they hit UB from poison lanes.
+ if (auto *BO = dyn_cast<BinaryOperator>(FrontV);
+ BO && BO->isIntDivRem())
+ return false;
+ Worklist.push_back(generateInstLaneVectorFromOperand(Item, 0));
+ Worklist.push_back(generateInstLaneVectorFromOperand(Item, 1));
+ continue;
+ } else if (isa<UnaryOperator, TruncInst, ZExtInst, SExtInst>(FrontV)) {
+ Worklist.push_back(generateInstLaneVectorFromOperand(Item, 0));
+ continue;
+ } else if (isa<SelectInst>(FrontV)) {
+ Worklist.push_back(generateInstLaneVectorFromOperand(Item, 0));
+ Worklist.push_back(generateInstLaneVectorFromOperand(Item, 1));
+ Worklist.push_back(generateInstLaneVectorFromOperand(Item, 2));
+ continue;
+ } else if (auto *II = dyn_cast<IntrinsicInst>(FrontV);
+ II && isTriviallyVectorizable(II->getIntrinsicID())) {
+ for (unsigned Op = 0, E = II->getNumOperands() - 1; Op < E; Op++) {
+ if (isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(), Op)) {
+ if (!all_of(drop_begin(Item), [Item, Op](InstLane &IL) {
+ Value *FrontV = Item.front().first->get();
+ Value *V = IL.first->get();
+ return !V || (cast<Instruction>(V)->getOperand(Op) ==
+ cast<Instruction>(FrontV)->getOperand(Op));
+ }))
+ return false;
+ continue;
+ }
+ Worklist.push_back(generateInstLaneVectorFromOperand(Item, Op));
}
- Worklist.push_back(generateInstLaneVectorFromOperand(Item, Op));
+ continue;
}
- } else {
- return false;
}
+
+ if (isFreeConcat(Item, TTI)) {
+ ConcatLeafs.insert(FrontV);
+ continue;
+ }
+
+ return false;
}
if (NumVisited <= 1)
@@ -1905,7 +1960,9 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
// If we got this far, we know the shuffles are superfluous and can be
// removed. Scan through again and generate the new tree of instructions.
- Value *V = generateNewInstTree(Start, Ty, IdentityLeafs, SplatLeafs, Builder);
+ Builder.SetInsertPoint(&I);
+ Value *V = generateNewInstTree(Start, Ty, IdentityLeafs, SplatLeafs,
+ ConcatLeafs, Builder);
replaceValue(I, *V);
return true;
}
diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/interleavevectorization.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/interleavevectorization.ll
index c085e10c049a9..3ee8ba5d09ed1 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/interleavevectorization.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/interleavevectorization.ll
@@ -22,9 +22,9 @@ define void @add4(ptr noalias noundef %x, ptr noalias noundef %y, i32 noundef %n
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <32 x i16>, ptr [[TMP0]], align 2
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[X]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[WIDE_VEC24:%.*]] = load <32 x i16>, ptr [[TMP1]], align 2
-; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = add <32 x i16> [[WIDE_VEC24]], [[WIDE_VEC]]
; CHECK-NEXT: [[TMP2:%.*]] = or disjoint i64 [[OFFSET_IDX]], 3
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[INVARIANT_GEP]], i64 [[TMP2]]
+; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = add <32 x i16> [[WIDE_VEC24]], [[WIDE_VEC]]
; CHECK-NEXT: store <32 x i16> [[INTERLEAVED_VEC]], ptr [[GEP]], align 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
@@ -403,12 +403,12 @@ define void @addmul(ptr noalias noundef %x, ptr noundef %y, ptr noundef %z, i32
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <32 x i16>, ptr [[TMP0]], align 2
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[Z:%.*]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[WIDE_VEC31:%.*]] = load <32 x i16>, ptr [[TMP1]], align 2
-; CHECK-NEXT: [[TMP2:%.*]] = mul <32 x i16> [[WIDE_VEC31]], [[WIDE_VEC]]
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[X]], i64 [[OFFSET_IDX]]
-; CHECK-NEXT: [[WIDE_VEC36:%.*]] = load <32 x i16>, ptr [[TMP3]], align 2
-; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = add <32 x i16> [[TMP2]], [[WIDE_VEC36]]
-; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i64 [[OFFSET_IDX]], 3
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[INVARIANT_GEP]], i64 [[TMP4]]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[X]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[WIDE_VEC36:%.*]] = load <32 x i16>, ptr [[TMP2]], align 2
+; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i64 [[OFFSET_IDX]], 3
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[INVARIANT_GEP]], i64 [[TMP3]]
+; CHECK-NEXT: [[TMP4:%.*]] = mul <32 x i16> [[WIDE_VEC31]], [[WIDE_VEC]]
+; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = add <32 x i16> [[TMP4]], [[WIDE_VEC36]]
; CHECK-NEXT: store <32 x i16> [[INTERLEAVED_VEC]], ptr [[GEP]], align 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
diff --git a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity-concat.ll b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity-concat.ll
index d72532963d12a..7aba1bbb1c9a0 100644
--- a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity-concat.ll
+++ b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity-concat.ll
@@ -82,15 +82,9 @@ define <8 x i8> @concata_addmul_small(<4 x i8> %a1, <4 x i8> %a2, <8 x i8> %b, <
define <8 x i32> @concata_addmul_big(<4 x i32> %a1, <4 x i32> %a2, <8 x i32> %b, <8 x i32> %c) {
; CHECK-LABEL: @concata_addmul_big(
-; CHECK-NEXT: [[BB:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[BT:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[CB:%.*]] = shufflevector <8 x i32> [[C:%.*]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[CT:%.*]] = shufflevector <8 x i32> [[C]], <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[XB:%.*]] = mul <4 x i32> [[A1:%.*]], [[BB]]
-; CHECK-NEXT: [[XT:%.*]] = mul <4 x i32> [[A2:%.*]], [[BT]]
-; CHECK-NEXT: [[YB:%.*]] = add <4 x i32> [[XB]], [[CB]]
-; CHECK-NEXT: [[YT:%.*]] = add <4 x i32> [[XT]], [[CT]]
-; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[YB]], <4 x i32> [[YT]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A1:%.*]], <4 x i32> [[A2:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i32> [[TMP1]], [[B:%.*]]
+; CHECK-NEXT: [[R:%.*]] = add <8 x i32> [[TMP2]], [[C:%.*]]
; CHECK-NEXT: ret <8 x i32> [[R]]
;
%bb = shufflevector <8 x i32> %b, <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -107,29 +101,11 @@ define <8 x i32> @concata_addmul_big(<4 x i32> %a1, <4 x i32> %a2, <8 x i32> %b,
define <16 x i32> @concata_addmul_bigger(<4 x i32> %a1a, <4 x i32> %a2a, <4 x i32> %a3a, <4 x i32> %a4a, <16 x i32> %b, <16 x i32> %c) {
; CHECK-LABEL: @concata_addmul_bigger(
-; CHECK-NEXT: [[A1:%.*]] = shufflevector <4 x i32> [[A1A:%.*]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[A2:%.*]] = shufflevector <4 x i32> [[A2A:%.*]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[A3:%.*]] = shufflevector <4 x i32> [[A3A:%.*]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[A4:%.*]] = shufflevector <4 x i32> [[A4A:%.*]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[B1:%.*]] = shufflevector <16 x i32> [[B:%.*]], <16 x i32> poison, <4 x i32> <i32 15, i32 14, i32 13, i32 12>
-; CHECK-NEXT: [[B2:%.*]] = shufflevector <16 x i32> [[B]], <16 x i32> poison, <4 x i32> <i32 11, i32 10, i32 9, i32 8>
-; CHECK-NEXT: [[B3:%.*]] = shufflevector <16 x i32> [[B]], <16 x i32> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT: [[B4:%.*]] = shufflevector <16 x i32> [[B]], <16 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[C1:%.*]] = shufflevector <16 x i32> [[C:%.*]], <16 x i32> poison, <4 x i32> <i32 15, i32 14, i32 13, i32 12>
-; CHECK-NEXT: [[C2:%.*]] = shufflevector <16 x i32> [[C]], <16 x i32> poison, <4 x i32> <i32 11, i32 10, i32 9, i32 8>
-; CHECK-NEXT: [[C3:%.*]] = shufflevector <16 x i32> [[C]], <16 x i32> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT: [[C4:%.*]] = shufflevector <16 x i32> [[C]], <16 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[X1:%.*]] = mul <4 x i32> [[A1]], [[B1]]
-; CHECK-NEXT: [[X2:%.*]] = mul <4 x i32> [[A2]], [[B2]]
-; CHECK-NEXT: [[X3:%.*]] = mul <4 x i32> [[A3]], [[B3]]
-; CHECK-NEXT: [[X4:%.*]] = mul <4 x i32> [[A4]], [[B4]]
-; CHECK-NEXT: [[Y1:%.*]] = add <4 x i32> [[X1]], [[C1]]
-; CHECK-NEXT: [[Y2:%.*]] =...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/94954
More information about the llvm-commits
mailing list