[llvm] e3f8c22 - [VectorCombine] foldInsExtVectorToShuffle - inserting into a poison base vector can be modelled as a single src shuffle
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 23 07:49:45 PST 2024
Author: Simon Pilgrim
Date: 2024-12-23T15:49:17Z
New Revision: e3f8c229f5710b52d18300b561925e556407ebee
URL: https://github.com/llvm/llvm-project/commit/e3f8c229f5710b52d18300b561925e556407ebee
DIFF: https://github.com/llvm/llvm-project/commit/e3f8c229f5710b52d18300b561925e556407ebee.diff
LOG: [VectorCombine] foldInsExtVectorToShuffle - inserting into a poison base vector can be modelled as a single src shuffle
We already canonicalized an undef base vector to the RHS to improve further folding, this extends this to improve the shuffle cost estimate of the single src shuffle
Added:
Modified:
llvm/lib/Transforms/Vectorize/VectorCombine.cpp
llvm/test/Transforms/PhaseOrdering/X86/hadd.ll
llvm/test/Transforms/VectorCombine/X86/extract-binop-inseltpoison.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index f1b80a5b69d420..ecbc13d489eb37 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -3025,21 +3025,32 @@ bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
if (ExtIdx >= NumElts || InsIdx >= NumElts)
return false;
- SmallVector<int> Mask(NumElts, 0);
- std::iota(Mask.begin(), Mask.end(), 0);
- Mask[InsIdx] = ExtIdx + NumElts;
+ // Insertion into poison is a cheaper single operand shuffle.
+ TargetTransformInfo::ShuffleKind SK;
+ SmallVector<int> Mask(NumElts, PoisonMaskElem);
+ if (isa<PoisonValue>(DstVec) && !isa<UndefValue>(SrcVec)) {
+ SK = TargetTransformInfo::SK_PermuteSingleSrc;
+ Mask[InsIdx] = ExtIdx;
+ std::swap(DstVec, SrcVec);
+ } else {
+ SK = TargetTransformInfo::SK_PermuteTwoSrc;
+ std::iota(Mask.begin(), Mask.end(), 0);
+ Mask[InsIdx] = ExtIdx + NumElts;
+ }
+
// Cost
auto *Ins = cast<InsertElementInst>(&I);
auto *Ext = cast<ExtractElementInst>(I.getOperand(1));
-
- InstructionCost OldCost =
- TTI.getVectorInstrCost(*Ext, VecTy, CostKind, ExtIdx) +
+ InstructionCost InsCost =
TTI.getVectorInstrCost(*Ins, VecTy, CostKind, InsIdx);
+ InstructionCost ExtCost =
+ TTI.getVectorInstrCost(*Ext, VecTy, CostKind, ExtIdx);
+ InstructionCost OldCost = ExtCost + InsCost;
- InstructionCost NewCost = TTI.getShuffleCost(
- TargetTransformInfo::SK_PermuteTwoSrc, VecTy, Mask, CostKind);
+ InstructionCost NewCost = TTI.getShuffleCost(SK, VecTy, Mask, CostKind, 0,
+ nullptr, {DstVec, SrcVec});
if (!Ext->hasOneUse())
- NewCost += TTI.getVectorInstrCost(*Ext, VecTy, CostKind, ExtIdx);
+ NewCost += ExtCost;
LLVM_DEBUG(dbgs() << "Found a insert/extract shuffle-like pair : " << I
<< "\n OldCost: " << OldCost << " vs NewCost: " << NewCost
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/hadd.ll b/llvm/test/Transforms/PhaseOrdering/X86/hadd.ll
index 0a3599b7e7ff6f..798824bce4dac2 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/hadd.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/hadd.ll
@@ -80,43 +80,28 @@ define <8 x i16> @add_v8i16_u1234567(<8 x i16> %a, <8 x i16> %b) {
; SSE4-LABEL: @add_v8i16_u1234567(
; SSE4-NEXT: [[SHIFT:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> poison, <8 x i32> <i32 poison, i32 poison, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; SSE4-NEXT: [[TMP1:%.*]] = add <8 x i16> [[A]], [[SHIFT]]
-; SSE4-NEXT: [[HADD1:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 poison, i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> <i32 5, i32 6, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; SSE4-NEXT: [[TMP3:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> <i32 4, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; SSE4-NEXT: [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[TMP3]]
-; SSE4-NEXT: [[HADD32:%.*]] = shufflevector <8 x i16> [[HADD1]], <8 x i16> [[TMP4]], <8 x i32> <i32 poison, i32 1, i32 8, i32 9, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE4-NEXT: [[HADD32:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP4]], <8 x i32> <i32 poison, i32 2, i32 8, i32 9, i32 poison, i32 poison, i32 poison, i32 poison>
; SSE4-NEXT: [[TMP5:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 poison, i32 poison, i32 poison, i32 poison>
; SSE4-NEXT: [[TMP6:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
; SSE4-NEXT: [[TMP7:%.*]] = add <8 x i16> [[TMP5]], [[TMP6]]
; SSE4-NEXT: [[RESULT:%.*]] = shufflevector <8 x i16> [[HADD32]], <8 x i16> [[TMP7]], <8 x i32> <i32 poison, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
; SSE4-NEXT: ret <8 x i16> [[RESULT]]
;
-; AVX2-LABEL: @add_v8i16_u1234567(
-; AVX2-NEXT: [[SHIFT:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> poison, <8 x i32> <i32 poison, i32 poison, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; AVX2-NEXT: [[TMP1:%.*]] = add <8 x i16> [[A]], [[SHIFT]]
-; AVX2-NEXT: [[HADD1:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 poison, i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> <i32 5, i32 6, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> <i32 4, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; AVX2-NEXT: [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[TMP3]]
-; AVX2-NEXT: [[HADD32:%.*]] = shufflevector <8 x i16> [[HADD1]], <8 x i16> [[TMP4]], <8 x i32> <i32 poison, i32 1, i32 8, i32 9, i32 poison, i32 poison, i32 poison, i32 poison>
-; AVX2-NEXT: [[TMP5:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 poison, i32 poison, i32 poison, i32 poison>
-; AVX2-NEXT: [[TMP6:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
-; AVX2-NEXT: [[TMP7:%.*]] = add <8 x i16> [[TMP5]], [[TMP6]]
-; AVX2-NEXT: [[RESULT:%.*]] = shufflevector <8 x i16> [[HADD32]], <8 x i16> [[TMP7]], <8 x i32> <i32 poison, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
-; AVX2-NEXT: ret <8 x i16> [[RESULT]]
-;
-; AVX512-LABEL: @add_v8i16_u1234567(
-; AVX512-NEXT: [[SHIFT:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> poison, <8 x i32> <i32 poison, i32 poison, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; AVX512-NEXT: [[TMP1:%.*]] = add <8 x i16> [[A]], [[SHIFT]]
-; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> <i32 5, i32 6, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; AVX512-NEXT: [[TMP3:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> <i32 4, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; AVX512-NEXT: [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[TMP3]]
-; AVX512-NEXT: [[HADD32:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP4]], <8 x i32> <i32 poison, i32 2, i32 8, i32 9, i32 poison, i32 poison, i32 poison, i32 poison>
-; AVX512-NEXT: [[TMP5:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 poison, i32 poison, i32 poison, i32 poison>
-; AVX512-NEXT: [[TMP6:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
-; AVX512-NEXT: [[TMP7:%.*]] = add <8 x i16> [[TMP5]], [[TMP6]]
-; AVX512-NEXT: [[RESULT:%.*]] = shufflevector <8 x i16> [[HADD32]], <8 x i16> [[TMP7]], <8 x i32> <i32 poison, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
-; AVX512-NEXT: ret <8 x i16> [[RESULT]]
+; AVX-LABEL: @add_v8i16_u1234567(
+; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> poison, <8 x i32> <i32 poison, i32 poison, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; AVX-NEXT: [[TMP1:%.*]] = add <8 x i16> [[A]], [[SHIFT]]
+; AVX-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> <i32 5, i32 6, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; AVX-NEXT: [[TMP3:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> <i32 4, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; AVX-NEXT: [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[TMP3]]
+; AVX-NEXT: [[HADD32:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP4]], <8 x i32> <i32 poison, i32 2, i32 8, i32 9, i32 poison, i32 poison, i32 poison, i32 poison>
+; AVX-NEXT: [[TMP5:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 poison, i32 poison, i32 poison, i32 poison>
+; AVX-NEXT: [[TMP6:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
+; AVX-NEXT: [[TMP7:%.*]] = add <8 x i16> [[TMP5]], [[TMP6]]
+; AVX-NEXT: [[RESULT:%.*]] = shufflevector <8 x i16> [[HADD32]], <8 x i16> [[TMP7]], <8 x i32> <i32 poison, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+; AVX-NEXT: ret <8 x i16> [[RESULT]]
;
%a0 = extractelement <8 x i16> %a, i32 0
%a1 = extractelement <8 x i16> %a, i32 1
diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-binop-inseltpoison.ll b/llvm/test/Transforms/VectorCombine/X86/extract-binop-inseltpoison.ll
index 80a21cef4e38ad..800f57646a3e13 100644
--- a/llvm/test/Transforms/VectorCombine/X86/extract-binop-inseltpoison.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/extract-binop-inseltpoison.ll
@@ -488,7 +488,7 @@ define <4 x float> @PR34724(<4 x float> %a, <4 x float> %b) {
; AVX-NEXT: [[B01:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
; AVX-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
; AVX-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[SHIFT2]], [[B]]
-; AVX-NEXT: [[V1:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 4, i32 2, i32 6, i32 7>
+; AVX-NEXT: [[V1:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 poison, i32 2, i32 poison, i32 poison>
; AVX-NEXT: [[V2:%.*]] = insertelement <4 x float> [[V1]], float [[B01]], i32 2
; AVX-NEXT: [[V3:%.*]] = shufflevector <4 x float> [[V2]], <4 x float> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
; AVX-NEXT: ret <4 x float> [[V3]]
More information about the llvm-commits
mailing list