[llvm] [VectorCombine] Fold insert(binop(x,y),binop(a,b),idx) --> binop(insert(x,a,idx),insert(x,a,idx)) (PR #124909)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 30 07:29:57 PST 2025
- Previous message: [llvm] [VectorCombine] Fold insert(binop(x,y),binop(a,b),idx) --> binop(insert(x,a,idx),insert(x,a,idx)) (PR #124909)
- Next message: [llvm] [VectorCombine] Fold insert(binop(x,y),binop(a,b),idx) --> binop(insert(x,a,idx),insert(x,a,idx)) (PR #124909)
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Simon Pilgrim (RKSimon)
<details>
<summary>Changes</summary>
Add foldInsExtBinop fold to cleanup missed vectorization cases which can happen on targets with cheap insert/extract instructions which prevent foldExtractExtract (binop(extract(x),extract(y)) -> extract(binop(x,shuffle(y)))) from helping with the merge.
---
Patch is 72.57 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/124909.diff
5 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/VectorCombine.cpp (+60)
- (modified) llvm/test/Transforms/PhaseOrdering/X86/hadd.ll (+53-127)
- (modified) llvm/test/Transforms/PhaseOrdering/X86/hsub.ll (+53-126)
- (modified) llvm/test/Transforms/PhaseOrdering/X86/pr50392.ll (+3-7)
- (modified) llvm/test/Transforms/VectorCombine/X86/insert-binop-vector.ll (+41-26)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 59920b5a4dd20a..09e4b8cc17fbbc 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -108,6 +108,7 @@ class VectorCombine {
Instruction &I);
bool foldExtractExtract(Instruction &I);
bool foldInsExtFNeg(Instruction &I);
+ bool foldInsExtBinop(Instruction &I);
bool foldInsExtVectorToShuffle(Instruction &I);
bool foldBitcastShuffle(Instruction &I);
bool scalarizeBinopOrCmp(Instruction &I);
@@ -738,6 +739,64 @@ bool VectorCombine::foldInsExtFNeg(Instruction &I) {
return true;
}
+/// Try to fold insert(binop(x,y),binop(a,b),idx)
+/// --> binop(insert(x,a,idx),insert(x,a,idx))
+bool VectorCombine::foldInsExtBinop(Instruction &I) {
+ BinaryOperator *VecBinOp, *SclBinOp;
+ uint64_t Index;
+ if (!match(&I,
+ m_InsertElt(m_OneUse(m_BinOp(VecBinOp)),
+ m_OneUse(m_BinOp(SclBinOp)), m_ConstantInt(Index))))
+ return false;
+
+ // TODO: Add support for addlike etc.
+ Instruction::BinaryOps BinOpcode = VecBinOp->getOpcode();
+ if (BinOpcode != SclBinOp->getOpcode())
+ return false;
+
+ auto *ResultTy = dyn_cast<FixedVectorType>(I.getType());
+ if (!ResultTy)
+ return false;
+
+ // TODO: Attempt to detect m_ExtractElt for scalar operands and convert to
+ // shuffle?
+
+ InstructionCost OldCost = TTI.getInstructionCost(&I, CostKind) +
+ TTI.getInstructionCost(VecBinOp, CostKind) +
+ TTI.getInstructionCost(SclBinOp, CostKind);
+ InstructionCost NewCost =
+ TTI.getArithmeticInstrCost(BinOpcode, ResultTy, CostKind) +
+ TTI.getVectorInstrCost(Instruction::InsertElement, ResultTy, CostKind,
+ Index, VecBinOp->getOperand(0),
+ SclBinOp->getOperand(0)) +
+ TTI.getVectorInstrCost(Instruction::InsertElement, ResultTy, CostKind,
+ Index, VecBinOp->getOperand(1),
+ SclBinOp->getOperand(1));
+
+ LLVM_DEBUG(dbgs() << "Found an insertion of two binops: " << I
+ << "\n OldCost: " << OldCost << " vs NewCost: " << NewCost
+ << "\n");
+ if (NewCost > OldCost)
+ return false;
+
+ Value *NewIns0 = Builder.CreateInsertElement(VecBinOp->getOperand(0),
+ SclBinOp->getOperand(0), Index);
+ Value *NewIns1 = Builder.CreateInsertElement(VecBinOp->getOperand(1),
+ SclBinOp->getOperand(1), Index);
+ Value *NewBO = Builder.CreateBinOp(BinOpcode, NewIns0, NewIns1);
+
+ // Intersect flags from the old binops.
+ if (auto *NewInst = dyn_cast<Instruction>(NewBO)) {
+ NewInst->copyIRFlags(VecBinOp);
+ NewInst->andIRFlags(SclBinOp);
+ }
+
+ Worklist.pushValue(NewIns0);
+ Worklist.pushValue(NewIns1);
+ replaceValue(I, *NewBO);
+ return true;
+}
+
/// If this is a bitcast of a shuffle, try to bitcast the source vector to the
/// destination type followed by shuffle. This can enable further transforms by
/// moving bitcasts or shuffles together.
@@ -3206,6 +3265,7 @@ bool VectorCombine::run() {
switch (Opcode) {
case Instruction::InsertElement:
MadeChange |= foldInsExtFNeg(I);
+ MadeChange |= foldInsExtBinop(I);
MadeChange |= foldInsExtVectorToShuffle(I);
break;
case Instruction::ShuffleVector:
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/hadd.ll b/llvm/test/Transforms/PhaseOrdering/X86/hadd.ll
index 9b139a4a4e234c..28b48bd3ce6d9e 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/hadd.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/hadd.ll
@@ -272,19 +272,17 @@ define <16 x i16> @add_v16i16_0123456789ABCDEF(<16 x i16> %a, <16 x i16> %b) {
define <16 x i16> @add_v16i16_0123u56789uBCDEF(<16 x i16> %a, <16 x i16> %b) {
; SSE2-LABEL: @add_v16i16_0123u56789uBCDEF(
-; SSE2-NEXT: [[A8:%.*]] = extractelement <16 x i16> [[A:%.*]], i64 8
-; SSE2-NEXT: [[A9:%.*]] = extractelement <16 x i16> [[A]], i64 9
-; SSE2-NEXT: [[A89:%.*]] = add i16 [[A8]], [[A9]]
; SSE2-NEXT: [[BC:%.*]] = extractelement <16 x i16> [[B:%.*]], i64 12
; SSE2-NEXT: [[BD:%.*]] = extractelement <16 x i16> [[B]], i64 13
; SSE2-NEXT: [[BE:%.*]] = extractelement <16 x i16> [[B]], i64 14
; SSE2-NEXT: [[BF:%.*]] = extractelement <16 x i16> [[B]], i64 15
; SSE2-NEXT: [[BCD:%.*]] = add i16 [[BC]], [[BD]]
; SSE2-NEXT: [[BEF:%.*]] = add i16 [[BE]], [[BF]]
-; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 poison, i32 18, i32 20, i32 22, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 poison, i32 18, i32 20, i32 22, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 poison, i32 19, i32 21, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; SSE2-NEXT: [[TMP3:%.*]] = add <16 x i16> [[TMP1]], [[TMP2]]
-; SSE2-NEXT: [[HADD8:%.*]] = insertelement <16 x i16> [[TMP3]], i16 [[A89]], i64 8
+; SSE2-NEXT: [[TMP3:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> [[A]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 5, i32 6, i32 7, i32 24, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE2-NEXT: [[TMP7:%.*]] = shufflevector <16 x i16> [[TMP2]], <16 x i16> [[A]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 5, i32 6, i32 7, i32 25, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE2-NEXT: [[HADD8:%.*]] = add <16 x i16> [[TMP3]], [[TMP7]]
; SSE2-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 10, i32 poison, i32 14, i32 24, i32 26, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; SSE2-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 11, i32 poison, i32 15, i32 25, i32 27, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; SSE2-NEXT: [[TMP6:%.*]] = add <16 x i16> [[TMP4]], [[TMP5]]
@@ -295,22 +293,16 @@ define <16 x i16> @add_v16i16_0123u56789uBCDEF(<16 x i16> %a, <16 x i16> %b) {
; SSE2-NEXT: ret <16 x i16> [[RESULT]]
;
; SSE4-LABEL: @add_v16i16_0123u56789uBCDEF(
-; SSE4-NEXT: [[A8:%.*]] = extractelement <16 x i16> [[A:%.*]], i64 8
-; SSE4-NEXT: [[A9:%.*]] = extractelement <16 x i16> [[A]], i64 9
-; SSE4-NEXT: [[A89:%.*]] = add i16 [[A8]], [[A9]]
-; SSE4-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B:%.*]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 poison, i32 18, i32 20, i32 22, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE4-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 poison, i32 18, i32 20, i32 22, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 poison, i32 19, i32 21, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; SSE4-NEXT: [[TMP3:%.*]] = add <16 x i16> [[TMP1]], [[TMP2]]
-; SSE4-NEXT: [[HADD8:%.*]] = insertelement <16 x i16> [[TMP3]], i16 [[A89]], i64 8
-; SSE4-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> poison, <16 x i32> <i32 10, i32 poison, i32 14, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; SSE4-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> poison, <16 x i32> <i32 11, i32 poison, i32 15, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; SSE4-NEXT: [[TMP6:%.*]] = add <16 x i16> [[TMP4]], [[TMP5]]
-; SSE4-NEXT: [[HADDB2:%.*]] = shufflevector <16 x i16> [[HADD8]], <16 x i16> [[TMP6]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 5, i32 6, i32 7, i32 8, i32 16, i32 poison, i32 18, i32 poison, i32 poison, i32 poison, i32 poison>
-; SSE4-NEXT: [[TMP7:%.*]] = shufflevector <16 x i16> [[B]], <16 x i16> poison, <16 x i32> <i32 8, i32 10, i32 12, i32 14, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; SSE4-NEXT: [[TMP8:%.*]] = shufflevector <16 x i16> [[B]], <16 x i16> poison, <16 x i32> <i32 9, i32 11, i32 13, i32 15, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE4-NEXT: [[TMP3:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> [[A]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 5, i32 6, i32 7, i32 24, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE4-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[TMP2]], <16 x i16> [[A]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 5, i32 6, i32 7, i32 25, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE4-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[TMP3]], <16 x i16> [[A]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 5, i32 6, i32 7, i32 8, i32 26, i32 poison, i32 30, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE4-NEXT: [[TMP6:%.*]] = shufflevector <16 x i16> [[TMP4]], <16 x i16> [[A]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 5, i32 6, i32 7, i32 8, i32 27, i32 poison, i32 31, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE4-NEXT: [[TMP7:%.*]] = shufflevector <16 x i16> [[TMP5]], <16 x i16> [[B]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 5, i32 6, i32 7, i32 8, i32 9, i32 poison, i32 11, i32 24, i32 26, i32 28, i32 30>
+; SSE4-NEXT: [[TMP8:%.*]] = shufflevector <16 x i16> [[TMP6]], <16 x i16> [[B]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 5, i32 6, i32 7, i32 8, i32 9, i32 poison, i32 11, i32 25, i32 27, i32 29, i32 31>
; SSE4-NEXT: [[TMP9:%.*]] = add <16 x i16> [[TMP7]], [[TMP8]]
-; SSE4-NEXT: [[RESULT:%.*]] = shufflevector <16 x i16> [[HADDB2]], <16 x i16> [[TMP9]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 5, i32 6, i32 7, i32 8, i32 9, i32 poison, i32 11, i32 16, i32 17, i32 18, i32 19>
-; SSE4-NEXT: ret <16 x i16> [[RESULT]]
+; SSE4-NEXT: ret <16 x i16> [[TMP9]]
;
; AVX2-LABEL: @add_v16i16_0123u56789uBCDEF(
; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 poison, i32 18, i32 20, i32 22, i32 9, i32 10, i32 poison, i32 14, i32 24, i32 26, i32 28, i32 30>
@@ -394,19 +386,17 @@ define <16 x i16> @add_v16i16_0123u56789uBCDEF(<16 x i16> %a, <16 x i16> %b) {
define <16 x i16> @add_v16i16_FEuCBA98765432u0(<16 x i16> %a, <16 x i16> %b) {
; SSE2-LABEL: @add_v16i16_FEuCBA98765432u0(
-; SSE2-NEXT: [[A8:%.*]] = extractelement <16 x i16> [[A:%.*]], i64 8
-; SSE2-NEXT: [[A9:%.*]] = extractelement <16 x i16> [[A]], i64 9
-; SSE2-NEXT: [[A89:%.*]] = add i16 [[A8]], [[A9]]
; SSE2-NEXT: [[BC:%.*]] = extractelement <16 x i16> [[B:%.*]], i64 12
; SSE2-NEXT: [[BD:%.*]] = extractelement <16 x i16> [[B]], i64 13
; SSE2-NEXT: [[BE:%.*]] = extractelement <16 x i16> [[B]], i64 14
; SSE2-NEXT: [[BF:%.*]] = extractelement <16 x i16> [[B]], i64 15
; SSE2-NEXT: [[BCD:%.*]] = add i16 [[BC]], [[BD]]
; SSE2-NEXT: [[BEF:%.*]] = add i16 [[BE]], [[BF]]
-; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 0, i32 poison, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B]], <16 x i32> <i32 0, i32 poison, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 1, i32 poison, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; SSE2-NEXT: [[TMP3:%.*]] = add <16 x i16> [[TMP1]], [[TMP2]]
-; SSE2-NEXT: [[HADD8:%.*]] = insertelement <16 x i16> [[TMP3]], i16 [[A89]], i64 8
+; SSE2-NEXT: [[TMP3:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> [[A]], <16 x i32> <i32 0, i32 poison, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 24, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE2-NEXT: [[TMP7:%.*]] = shufflevector <16 x i16> [[TMP2]], <16 x i16> [[A]], <16 x i32> <i32 0, i32 poison, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 25, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE2-NEXT: [[HADD8:%.*]] = add <16 x i16> [[TMP3]], [[TMP7]]
; SSE2-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 10, i32 12, i32 14, i32 24, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; SSE2-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 11, i32 13, i32 15, i32 25, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; SSE2-NEXT: [[TMP6:%.*]] = add <16 x i16> [[TMP4]], [[TMP5]]
@@ -417,21 +407,17 @@ define <16 x i16> @add_v16i16_FEuCBA98765432u0(<16 x i16> %a, <16 x i16> %b) {
; SSE2-NEXT: ret <16 x i16> [[RESULT]]
;
; SSE4-LABEL: @add_v16i16_FEuCBA98765432u0(
-; SSE4-NEXT: [[A8:%.*]] = extractelement <16 x i16> [[A:%.*]], i64 8
-; SSE4-NEXT: [[A9:%.*]] = extractelement <16 x i16> [[A]], i64 9
-; SSE4-NEXT: [[A89:%.*]] = add i16 [[A8]], [[A9]]
-; SSE4-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B:%.*]], <16 x i32> <i32 0, i32 poison, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE4-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> <i32 0, i32 poison, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 1, i32 poison, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; SSE4-NEXT: [[TMP3:%.*]] = add <16 x i16> [[TMP1]], [[TMP2]]
-; SSE4-NEXT: [[HADD8:%.*]] = insertelement <16 x i16> [[TMP3]], i16 [[A89]], i64 8
-; SSE4-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> poison, <16 x i32> <i32 11, i32 12, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; SSE4-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> poison, <16 x i32> <i32 10, i32 13, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE4-NEXT: [[TMP3:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> [[A]], <16 x i32> <i32 0, i32 poison, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 24, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE4-NEXT: [[TMP10:%.*]] = shufflevector <16 x i16> [[TMP2]], <16 x i16> [[A]], <16 x i32> <i32 0, i32 poison, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 25, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE4-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[TMP3]], <16 x i16> [[A]], <16 x i32> <i32 0, i32 poison, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 poison, i32 27, i32 28, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE4-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[TMP10]], <16 x i16> [[A]], <16 x i32> <i32 0, i32 poison, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 poison, i32 26, i32 29, i32 poison, i32 poison, i32 poison, i32 poison>
; SSE4-NEXT: [[TMP6:%.*]] = add <16 x i16> [[TMP4]], [[TMP5]]
-; SSE4-NEXT: [[HADDA2:%.*]] = shufflevector <16 x i16> [[HADD8]], <16 x i16> [[TMP6]], <16 x i32> <i32 0, i32 poison, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 16, i32 17, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; SSE4-NEXT: [[TMP7:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 14, i32 24, i32 poison, i32 28, i32 30, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; SSE4-NEXT: [[TMP8:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 15, i32 25, i32 poison, i32 29, i32 31, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE4-NEXT: [[TMP7:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 14, i32 24, i32 28, i32 30, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE4-NEXT: [[TMP8:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 15, i32 25, i32 29, i32 31, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; SSE4-NEXT: [[TMP9:%.*]] = add <16 x i16> [[TMP7]], [[TMP8]]
-; SSE4-NEXT: [[RESULT:%.*]] = shufflevector <16 x i16> [[TMP9]], <16 x i16> [[HADDA2]], <16 x i32> <i32 4, i32 3, i32 poison, i32 1, i32 0, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 poison, i32 16>
+; SSE4-NEXT: [[RESULT:%.*]] = shufflevector <16 x i16> [[TMP9]], <16 x i16> [[TMP6]], <16 x i32> <i32 3, i32 2, i32 poison, i32 1, i32 0, i32 27, i32 26, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 poison, i32 16>
; SSE4-NEXT: ret <16 x i16> [[RESULT]]
;
; AVX2-LABEL: @add_v16i16_FEuCBA98765432u0(
@@ -826,31 +812,11 @@ define <8 x i32> @add_v8i32_01234567(<8 x i32> %a, <8 x i32> %b) {
}
define <8 x i32> @add_v8i32_01234u67(<8 x i32> %a, <8 x i32> %b) {
-; SSE2-LABEL: @add_v8i32_01234u67(
-; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 poison, i32 13, i32 14>
-; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 poison, i32 12, i32 15>
-; SSE2-NEXT: [[RESULT:%.*]] = add <8 x i32> [[TMP1]], [[TMP2]]
-; SSE2-NEXT: ret <8 x i32> [[RESULT]]
-;
-; SSE4-LABEL: @add_v8i32_01234u67(
-; SSE4-NEXT: [[A4:%.*]] = extractelement <8 x i32> [[A:%.*]], i64 4
-; SSE4-NEXT: [[A5:%.*]] = extractelement <8 x i32> [[A]], i64 5
-; SSE4-NEXT: [[A45:%.*]] = add i32 [[A4]], [[A5]]
-; SSE4-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 poison, i32 poison, i32 poison, i32 poison>
-; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <8 x i32> <i32 1, i32...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/124909
- Previous message: [llvm] [VectorCombine] Fold insert(binop(x,y),binop(a,b),idx) --> binop(insert(x,a,idx),insert(x,a,idx)) (PR #124909)
- Next message: [llvm] [VectorCombine] Fold insert(binop(x,y),binop(a,b),idx) --> binop(insert(x,a,idx),insert(x,a,idx)) (PR #124909)
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
More information about the llvm-commits
mailing list