[llvm] [SLP] Make getSameOpcode support different instructions if they have same semantics. (PR #112181)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 14 03:49:27 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-vectorizers
Author: Han-Kuan Chen (HanKuanChen)
<details>
<summary>Changes</summary>
---
Patch is 41.47 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/112181.diff
14 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp (+207-29)
- (modified) llvm/test/Transforms/SLPVectorizer/AArch64/vec3-base.ll (+4-4)
- (modified) llvm/test/Transforms/SLPVectorizer/RISCV/reversed-strided-node-with-external-ptr.ll (+3-4)
- (modified) llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll (+4-4)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll (+1-3)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/bottom-to-top-reorder.ll (+11-16)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll (+14-13)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/extractcost.ll (+1-3)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-drop-wrapping-flags.ll (+1-3)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/multi-extracts-bv-combined.ll (+1-3)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/vec3-base.ll (+12-7)
- (modified) llvm/test/Transforms/SLPVectorizer/alternate-opcode-sindle-bv.ll (+23-13)
- (modified) llvm/test/Transforms/SLPVectorizer/resized-alt-shuffle-after-minbw.ll (+30-32)
- (modified) llvm/test/Transforms/SLPVectorizer/shuffle-mask-resized.ll (+1-3)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 401597af35bdac..fdda87e541ca74 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -818,6 +818,105 @@ struct InstructionsState {
} // end anonymous namespace
+struct InterchangeableInstruction {
+ unsigned Opcode;
+ SmallVector<Value *> Ops;
+ template <class... ArgTypes>
+ InterchangeableInstruction(unsigned Opcode, ArgTypes &&...Args)
+ : Opcode(Opcode), Ops{std::forward<decltype(Args)>(Args)...} {}
+};
+
+bool operator<(const InterchangeableInstruction &LHS,
+ const InterchangeableInstruction &RHS) {
+ return LHS.Opcode < RHS.Opcode;
+}
+
+/// \returns a list of interchangeable instructions which \p I can be converted
+/// to.
+/// e.g.,
+/// x << y -> x * (2^y)
+/// x << 1 -> x * 2
+/// x << 0 -> x * 1 -> x - 0 -> x + 0 -> x & 11...1 -> x | 0
+/// x * 0 -> x & 0
+/// x * -1 -> 0 - x
+/// TODO: support more patterns
+static SmallVector<InterchangeableInstruction, 6>
+getInterchangeableInstruction(Instruction *I) {
+ // PII = Possible Interchangeable Instruction
+ SmallVector<InterchangeableInstruction, 6> PII;
+ unsigned Opcode = I->getOpcode();
+ PII.emplace_back(Opcode, I->operands());
+ if (!is_contained({Instruction::Shl, Instruction::Mul, Instruction::Sub,
+ Instruction::Add},
+ Opcode))
+ return PII;
+ Constant *C;
+ if (match(I, m_BinOp(m_Value(), m_Constant(C)))) {
+ ConstantInt *V = nullptr;
+ if (auto *CI = dyn_cast<ConstantInt>(C)) {
+ V = CI;
+ } else if (auto *CDV = dyn_cast<ConstantDataVector>(C)) {
+ if (auto *CI = dyn_cast_if_present<ConstantInt>(CDV->getSplatValue()))
+ V = CI;
+ }
+ if (!V)
+ return PII;
+ Value *Op0 = I->getOperand(0);
+ Type *Op1Ty = I->getOperand(1)->getType();
+ const APInt &Op1Int = V->getValue();
+ Constant *Zero =
+ ConstantInt::get(Op1Ty, APInt::getZero(Op1Int.getBitWidth()));
+ Constant *UnsignedMax =
+ ConstantInt::get(Op1Ty, APInt::getMaxValue(Op1Int.getBitWidth()));
+ switch (Opcode) {
+ case Instruction::Shl: {
+ PII.emplace_back(Instruction::Mul, Op0,
+ ConstantInt::get(Op1Ty, 1 << Op1Int.getZExtValue()));
+ if (Op1Int.isZero()) {
+ PII.emplace_back(Instruction::Sub, Op0, Zero);
+ PII.emplace_back(Instruction::Add, Op0, Zero);
+ PII.emplace_back(Instruction::And, Op0, UnsignedMax);
+ PII.emplace_back(Instruction::Or, Op0, Zero);
+ }
+ break;
+ }
+ case Instruction::Mul: {
+ switch (Op1Int.getSExtValue()) {
+ case 1:
+ PII.emplace_back(Instruction::Sub, Op0, Zero);
+ PII.emplace_back(Instruction::Add, Op0, Zero);
+ PII.emplace_back(Instruction::And, Op0, UnsignedMax);
+ PII.emplace_back(Instruction::Or, Op0, Zero);
+ break;
+ case 0:
+ PII.emplace_back(Instruction::And, Op0, Zero);
+ break;
+ case -1:
+ PII.emplace_back(Instruction::Sub, Zero, Op0);
+ break;
+ }
+ break;
+ }
+ case Instruction::Sub:
+ if (Op1Int.isZero()) {
+ PII.emplace_back(Instruction::Add, Op0, Zero);
+ PII.emplace_back(Instruction::And, Op0, UnsignedMax);
+ PII.emplace_back(Instruction::Or, Op0, Zero);
+ }
+ break;
+ case Instruction::Add:
+ if (Op1Int.isZero()) {
+ PII.emplace_back(Instruction::And, Op0, UnsignedMax);
+ PII.emplace_back(Instruction::Or, Op0, Zero);
+ }
+ break;
+ }
+ }
+ // std::set_intersection requires a sorted range.
+ sort(PII);
+ return PII;
+}
+
/// \returns true if \p Opcode is allowed as part of the main/alternate
/// instruction for SLP vectorization.
///
@@ -922,18 +1021,54 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
if (!isTriviallyVectorizable(BaseID) && BaseMappings.empty())
return InstructionsState(VL[BaseIndex], nullptr, nullptr);
}
+ // Currently, this is only used for binary ops.
+ // TODO: support all instructions
+ SmallVector<InterchangeableInstruction> InterchangeableOpcode =
+ getInterchangeableInstruction(cast<Instruction>(VL[BaseIndex]));
+ SmallVector<InterchangeableInstruction> AlternateInterchangeableOpcode;
+ auto UpdateInterchangeableOpcode =
+ [](SmallVector<InterchangeableInstruction> &LHS,
+ ArrayRef<InterchangeableInstruction> RHS) {
+ SmallVector<InterchangeableInstruction> NewInterchangeableOpcode;
+ std::set_intersection(LHS.begin(), LHS.end(), RHS.begin(), RHS.end(),
+ std::back_inserter(NewInterchangeableOpcode));
+ if (NewInterchangeableOpcode.empty())
+ return false;
+ LHS = std::move(NewInterchangeableOpcode);
+ return true;
+ };
for (int Cnt = 0, E = VL.size(); Cnt < E; Cnt++) {
auto *I = cast<Instruction>(VL[Cnt]);
unsigned InstOpcode = I->getOpcode();
if (IsBinOp && isa<BinaryOperator>(I)) {
- if (InstOpcode == Opcode || InstOpcode == AltOpcode)
+ SmallVector<InterchangeableInstruction> ThisInterchangeableOpcode(
+ getInterchangeableInstruction(I));
+ if (UpdateInterchangeableOpcode(InterchangeableOpcode,
+ ThisInterchangeableOpcode))
continue;
- if (Opcode == AltOpcode && isValidForAlternation(InstOpcode) &&
- isValidForAlternation(Opcode)) {
- AltOpcode = InstOpcode;
- AltIndex = Cnt;
+ if (AlternateInterchangeableOpcode.empty()) {
+ InterchangeableOpcode.erase(
+ std::remove_if(InterchangeableOpcode.begin(),
+ InterchangeableOpcode.end(),
+ [](const InterchangeableInstruction &I) {
+ return !isValidForAlternation(I.Opcode);
+ }),
+ InterchangeableOpcode.end());
+ ThisInterchangeableOpcode.erase(
+ std::remove_if(ThisInterchangeableOpcode.begin(),
+ ThisInterchangeableOpcode.end(),
+ [](const InterchangeableInstruction &I) {
+ return !isValidForAlternation(I.Opcode);
+ }),
+ ThisInterchangeableOpcode.end());
+ if (InterchangeableOpcode.empty() || ThisInterchangeableOpcode.empty())
+ return InstructionsState(VL[BaseIndex], nullptr, nullptr);
+ AlternateInterchangeableOpcode = std::move(ThisInterchangeableOpcode);
continue;
}
+ if (UpdateInterchangeableOpcode(AlternateInterchangeableOpcode,
+ ThisInterchangeableOpcode))
+ continue;
} else if (IsCastOp && isa<CastInst>(I)) {
Value *Op0 = IBase->getOperand(0);
Type *Ty0 = Op0->getType();
@@ -1027,6 +1162,22 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
return InstructionsState(VL[BaseIndex], nullptr, nullptr);
}
+ if (IsBinOp) {
+ auto FindOp =
+ [&](const SmallVector<InterchangeableInstruction> &CandidateOp) {
+ for (Value *V : VL)
+ for (const InterchangeableInstruction &I : CandidateOp)
+ if (cast<Instruction>(V)->getOpcode() == I.Opcode)
+ return cast<Instruction>(V);
+ llvm_unreachable(
+ "Cannot find the candidate instruction for InstructionsState.");
+ };
+ Instruction *MainOp = FindOp(InterchangeableOpcode);
+ Instruction *AltOp = AlternateInterchangeableOpcode.empty()
+ ? MainOp
+ : FindOp(AlternateInterchangeableOpcode);
+ return InstructionsState(VL[BaseIndex], MainOp, AltOp);
+ }
return InstructionsState(VL[BaseIndex], cast<Instruction>(VL[BaseIndex]),
cast<Instruction>(VL[AltIndex]));
}
@@ -2318,24 +2469,41 @@ class BoUpSLP {
: cast<Instruction>(VL[0])->getNumOperands();
OpsVec.resize(NumOperands);
unsigned NumLanes = VL.size();
- for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
+ InstructionsState S = getSameOpcode(VL, TLI);
+ for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx)
OpsVec[OpIdx].resize(NumLanes);
- for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
- assert(isa<Instruction>(VL[Lane]) && "Expected instruction");
- // Our tree has just 3 nodes: the root and two operands.
- // It is therefore trivial to get the APO. We only need to check the
- // opcode of VL[Lane] and whether the operand at OpIdx is the LHS or
- // RHS operand. The LHS operand of both add and sub is never attached
- // to an inversese operation in the linearized form, therefore its APO
- // is false. The RHS is true only if VL[Lane] is an inverse operation.
-
- // Since operand reordering is performed on groups of commutative
- // operations or alternating sequences (e.g., +, -), we can safely
- // tell the inverse operations by checking commutativity.
- bool IsInverseOperation = !isCommutative(cast<Instruction>(VL[Lane]));
+ for (auto [I, V] : enumerate(VL)) {
+ assert(isa<Instruction>(V) && "Expected instruction");
+ SmallVector<InterchangeableInstruction> IIList =
+ getInterchangeableInstruction(cast<Instruction>(V));
+ Value *SelectedOp;
+ auto Iter = find_if(IIList, [&](const InterchangeableInstruction &II) {
+ return II.Opcode == S.MainOp->getOpcode();
+ });
+ if (Iter == IIList.end()) {
+ Iter = find_if(IIList, [&](const InterchangeableInstruction &II) {
+ return II.Opcode == S.AltOp->getOpcode();
+ });
+ SelectedOp = S.AltOp;
+ } else {
+ SelectedOp = S.MainOp;
+ }
+ assert(Iter != IIList.end() &&
+ "Cannot find an interchangeable instruction.");
+ // Our tree has just 3 nodes: the root and two operands.
+ // It is therefore trivial to get the APO. We only need to check the
+ // opcode of V and whether the operand at OpIdx is the LHS or RHS
+ // operand. The LHS operand of both add and sub is never attached to an
+ // inversese operation in the linearized form, therefore its APO is
+ // false. The RHS is true only if V is an inverse operation.
+
+ // Since operand reordering is performed on groups of commutative
+ // operations or alternating sequences (e.g., +, -), we can safely
+ // tell the inverse operations by checking commutativity.
+ bool IsInverseOperation = !isCommutative(cast<Instruction>(SelectedOp));
+ for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
bool APO = (OpIdx == 0) ? false : IsInverseOperation;
- OpsVec[OpIdx][Lane] = {cast<Instruction>(VL[Lane])->getOperand(OpIdx),
- APO, false};
+ OpsVec[OpIdx][I] = {Iter->Ops[OpIdx], APO, false};
}
}
}
@@ -3227,15 +3395,25 @@ class BoUpSLP {
auto *I0 = cast<Instruction>(Scalars[0]);
Operands.resize(I0->getNumOperands());
unsigned NumLanes = Scalars.size();
- for (unsigned OpIdx = 0, NumOperands = I0->getNumOperands();
- OpIdx != NumOperands; ++OpIdx) {
+ unsigned NumOperands = I0->getNumOperands();
+ for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx)
Operands[OpIdx].resize(NumLanes);
- for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
- auto *I = cast<Instruction>(Scalars[Lane]);
- assert(I->getNumOperands() == NumOperands &&
- "Expected same number of operands");
- Operands[OpIdx][Lane] = I->getOperand(OpIdx);
- }
+ for (auto [I, V] : enumerate(Scalars)) {
+ SmallVector<InterchangeableInstruction> IIList =
+ getInterchangeableInstruction(cast<Instruction>(V));
+ auto Iter = find_if(IIList, [&](const InterchangeableInstruction &II) {
+ return II.Opcode == MainOp->getOpcode();
+ });
+ if (Iter == IIList.end())
+ Iter = find_if(IIList, [&](const InterchangeableInstruction &II) {
+ return II.Opcode == AltOp->getOpcode();
+ });
+ assert(Iter != IIList.end() &&
+ "Cannot find an interchangeable instruction.");
+ assert(Iter->Ops.size() == NumOperands &&
+ "Expected same number of operands");
+ for (auto [J, Op] : enumerate(Iter->Ops))
+ Operands[J][I] = Op;
}
}
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-base.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-base.ll
index c18811a35c1eeb..c7c999bb572851 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-base.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-base.ll
@@ -314,10 +314,10 @@ define void @store_try_reorder(ptr %dst) {
;
; POW2-ONLY-LABEL: @store_try_reorder(
; POW2-ONLY-NEXT: entry:
-; POW2-ONLY-NEXT: [[ADD:%.*]] = add i32 0, 0
-; POW2-ONLY-NEXT: store i32 [[ADD]], ptr [[DST:%.*]], align 4
-; POW2-ONLY-NEXT: [[ARRAYIDX_I1887:%.*]] = getelementptr i32, ptr [[DST]], i64 1
-; POW2-ONLY-NEXT: store <2 x i32> zeroinitializer, ptr [[ARRAYIDX_I1887]], align 4
+; POW2-ONLY-NEXT: store <2 x i32> zeroinitializer, ptr [[DST:%.*]], align 4
+; POW2-ONLY-NEXT: [[ADD216:%.*]] = sub i32 0, 0
+; POW2-ONLY-NEXT: [[ARRAYIDX_I1891:%.*]] = getelementptr i32, ptr [[DST]], i64 2
+; POW2-ONLY-NEXT: store i32 [[ADD216]], ptr [[ARRAYIDX_I1891]], align 4
; POW2-ONLY-NEXT: ret void
;
entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/reversed-strided-node-with-external-ptr.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/reversed-strided-node-with-external-ptr.ll
index 3fa42047162e45..7bc03e7c7755b4 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/reversed-strided-node-with-external-ptr.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/reversed-strided-node-with-external-ptr.ll
@@ -7,13 +7,12 @@ define void @test(ptr %a, i64 %0) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[A]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x ptr> [[TMP1]], <2 x ptr> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[TMP0]], i32 0
; CHECK-NEXT: br label %[[BB:.*]]
; CHECK: [[BB]]:
-; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i64 [[TMP0]], 1
-; CHECK-NEXT: [[ARRAYIDX17_I28_1:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP3]]
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i32 0
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> [[TMP4]], i64 0, i32 1
+; CHECK-NEXT: [[TMP5:%.*]] = or disjoint <2 x i64> [[TMP3]], <i64 1, i64 0>
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr double, <2 x ptr> [[TMP2]], <2 x i64> [[TMP5]]
+; CHECK-NEXT: [[ARRAYIDX17_I28_1:%.*]] = extractelement <2 x ptr> [[TMP6]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> [[TMP6]], i32 8, <2 x i1> <i1 true, i1 true>, <2 x double> poison)
; CHECK-NEXT: [[TMP8:%.*]] = load <2 x double>, ptr [[A]], align 8
; CHECK-NEXT: [[TMP9:%.*]] = load <2 x double>, ptr [[A]], align 8
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll
index 308d0e27f1ea89..e158c2a3ed87ea 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll
@@ -324,10 +324,10 @@ define void @store_try_reorder(ptr %dst) {
;
; POW2-ONLY-LABEL: @store_try_reorder(
; POW2-ONLY-NEXT: entry:
-; POW2-ONLY-NEXT: [[ADD:%.*]] = add i32 0, 0
-; POW2-ONLY-NEXT: store i32 [[ADD]], ptr [[DST:%.*]], align 4
-; POW2-ONLY-NEXT: [[ARRAYIDX_I1887:%.*]] = getelementptr i32, ptr [[DST]], i64 1
-; POW2-ONLY-NEXT: store <2 x i32> zeroinitializer, ptr [[ARRAYIDX_I1887]], align 4
+; POW2-ONLY-NEXT: store <2 x i32> zeroinitializer, ptr [[DST:%.*]], align 4
+; POW2-ONLY-NEXT: [[ADD216:%.*]] = sub i32 0, 0
+; POW2-ONLY-NEXT: [[ARRAYIDX_I1891:%.*]] = getelementptr i32, ptr [[DST]], i64 2
+; POW2-ONLY-NEXT: store i32 [[ADD216]], ptr [[ARRAYIDX_I1891]], align 4
; POW2-ONLY-NEXT: ret void
;
entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll b/llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll
index d388fd17925a16..d2e70f05204d79 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll
@@ -10,9 +10,7 @@ define i32 @foo(ptr nocapture %A, i32 %n) {
; CHECK-NEXT: [[CALL:%.*]] = tail call i32 (...) @bar()
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP1:%.*]] = mul nsw <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 3, i32 10>
-; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 3, i32 10>
-; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
+; CHECK-NEXT: [[TMP3:%.*]] = mul nsw <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 8, i32 10>
; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], <i32 9, i32 9, i32 9, i32 9>
; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[A:%.*]], align 4
; CHECK-NEXT: ret i32 undef
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/bottom-to-top-reorder.ll b/llvm/test/Transforms/SLPVectorizer/X86/bottom-to-top-reorder.ll
index 889f5a95c81d69..7af0c64f187480 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/bottom-to-top-reorder.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/bottom-to-top-reorder.ll
@@ -4,22 +4,17 @@
define void @test(ptr %0, ptr %1, ptr %2) {
; CHECK-LABEL: @test(
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 4
-; CHECK-NEXT: [[TMP6:%.*]] = load <4 x i32>, ptr [[TMP1:%.*]], align 4
-; CHECK-NEXT: [[TMP8:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
-; CHECK-NEXT: [[TMP10:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
-; CHECK-NEXT: [[TMP11:%.*]] = sub <4 x i32> <i32 0, i32 0, i32 undef, i32 0>, [[TMP8]]
-; CHECK-NEXT: [[TMP12:%.*]] = sub <4 x i32> [[TMP11]], [[TMP10]]
-; CHECK-NEXT: [[TMP13:%.*]] = add <4 x i32> [[TMP12]], [[TMP6]]
-; CHECK-NEXT: [[TMP14:%.*]] = add <4 x i32> [[TMP13]], <i32 0, i32 0, i32 1, i32 0>
-; CHECK-NEXT: [[TMP15:%.*]] = sub <4 x i32> [[TMP13]], <i32 0, i32 0, i32 1, i32 0>
-; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], <4 x i32> <i32 2, i32 0, i32 1, i32 7>
-; CHECK-NEXT: [[TMP17:%.*]] = add <4 x i32> [[TMP16]], zeroinitializer
-; CHECK-NEXT: [[TMP18:%.*]] = sub <4 x i32> [[TMP16]], zeroinitializer
-; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <4 x i32> [[TMP17]], <4 x i32> [[TMP18]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[TMP20:%.*]] = add <4 x i32> [[TMP19]], zeroinitializer
-; CHECK-NEXT: [[TMP21:%.*]] = sub <4 x i32> [[TMP19]], zeroinitializer
-; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <4 x i32> [[TMP20]], <4 x i32> [[TMP21]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
-; CHECK-NEXT: store <4 x i32> [[TMP22]], ptr [[TMP2:%.*]], align 4
+; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr [[TMP1:%.*]], align 4
+; CHECK-NEXT: [[TMP6:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
+; CHECK-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
+; CHECK-NEXT: [[TMP8:%.*]] = sub <4 x i32> <i32 0, i32 0, i32 undef, i32 0>, [[TMP6]]
+; CHECK-NEXT: [[TMP9:%.*]] = sub <4 x i32> [[TMP8]], [[TMP7]]
+; CHECK-NEXT: [[TMP10:%.*]] = add <4 x i32> [[TMP9]], [[TMP5]]
+; CHECK-NEXT: [[TMP11:%.*]] = add <4 x i32> <i32 0, i32 0, i32 1, i32 0>, [[TMP10]]
+; CHECK-NEXT: [[TMP12:%.*]] = add <4 x i32> [[TMP11]], zeroinitializer
+; CHECK-NEXT: [[TMP13:%.*]] = add <4 x i32> [[TMP12]], zeroinitializer
+; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP13]], <4 x i32> poison, <4 x ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/112181
More information about the llvm-commits
mailing list