[PATCH] D114316: [X86][Costmodel] Now that `getReplicationShuffleCost()` is good, update `getInterleavedMemoryOpCostAVX512()`
Roman Lebedev via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 22 03:51:06 PST 2021
lebedev.ri updated this revision to Diff 388864.
lebedev.ri added a comment.
Rebased.
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D114316/new/
https://reviews.llvm.org/D114316
Files:
llvm/lib/Target/X86/X86TargetTransformInfo.cpp
llvm/test/Analysis/CostModel/X86/interleaved-store-accesses-with-gaps.ll
Index: llvm/test/Analysis/CostModel/X86/interleaved-store-accesses-with-gaps.ll
===================================================================
--- llvm/test/Analysis/CostModel/X86/interleaved-store-accesses-with-gaps.ll
+++ llvm/test/Analysis/CostModel/X86/interleaved-store-accesses-with-gaps.ll
@@ -43,13 +43,13 @@
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 13 for VF 2 For instruction: store i16 %2, i16* %arrayidx7, align 2
;
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 4 For instruction: store i16 %0, i16* %arrayidx2, align 2
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 13 for VF 4 For instruction: store i16 %2, i16* %arrayidx7, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 14 for VF 4 For instruction: store i16 %2, i16* %arrayidx7, align 2
;
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 8 For instruction: store i16 %0, i16* %arrayidx2, align 2
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 16 for VF 8 For instruction: store i16 %2, i16* %arrayidx7, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 14 for VF 8 For instruction: store i16 %2, i16* %arrayidx7, align 2
;
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 16 For instruction: store i16 %0, i16* %arrayidx2, align 2
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 31 for VF 16 For instruction: store i16 %2, i16* %arrayidx7, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 27 for VF 16 For instruction: store i16 %2, i16* %arrayidx7, align 2
define void @test1(i16* noalias nocapture %points, i16* noalias nocapture readonly %x, i16* noalias nocapture readonly %y) {
entry:
@@ -110,13 +110,13 @@
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 13 for VF 2 For instruction: store i16 %2, i16* %arrayidx7, align 2
;
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 4 For instruction: store i16 %0, i16* %arrayidx2, align 2
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 13 for VF 4 For instruction: store i16 %2, i16* %arrayidx7, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 14 for VF 4 For instruction: store i16 %2, i16* %arrayidx7, align 2
;
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 8 For instruction: store i16 %0, i16* %arrayidx2, align 2
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 16 for VF 8 For instruction: store i16 %2, i16* %arrayidx7, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 14 for VF 8 For instruction: store i16 %2, i16* %arrayidx7, align 2
;
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 16 For instruction: store i16 %0, i16* %arrayidx2, align 2
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 31 for VF 16 For instruction: store i16 %2, i16* %arrayidx7, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 27 for VF 16 For instruction: store i16 %2, i16* %arrayidx7, align 2
define void @test2(i16* noalias nocapture %points, i32 %numPoints, i16* noalias nocapture readonly %x, i16* noalias nocapture readonly %y) {
entry:
Index: llvm/lib/Target/X86/X86TargetTransformInfo.cpp
===================================================================
--- llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -5285,7 +5285,6 @@
unsigned VF = VecTy->getNumElements() / Factor;
MVT VT = MVT::getVectorVT(MVT::getVT(VecTy->getScalarType()), VF);
- // FIXME: this is the most conservative estimate for the mask cost.
InstructionCost MaskCost;
if (UseMaskForCond || UseMaskForGaps) {
APInt DemandedLoadStoreElts = APInt::getZero(VecTy->getNumElements());
@@ -5295,10 +5294,10 @@
DemandedLoadStoreElts.setBit(Index + Elm * Factor);
}
- Type *I8Type = Type::getInt8Ty(VecTy->getContext());
+ Type *I1Type = Type::getInt1Ty(VecTy->getContext());
MaskCost = getReplicationShuffleCost(
- I8Type, Factor, VF,
+ I1Type, Factor, VF,
UseMaskForGaps ? DemandedLoadStoreElts
: APInt::getAllOnes(VecTy->getNumElements()),
CostKind);
@@ -5309,7 +5308,7 @@
// memory access, we need to account for the cost of And-ing the two masks
// inside the loop.
if (UseMaskForGaps) {
- auto *MaskVT = FixedVectorType::get(I8Type, VecTy->getNumElements());
+ auto *MaskVT = FixedVectorType::get(I1Type, VecTy->getNumElements());
MaskCost += getArithmeticInstrCost(BinaryOperator::And, MaskVT, CostKind);
}
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D114316.388864.patch
Type: text/x-patch
Size: 4651 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20211122/3d543564/attachment.bin>
More information about the llvm-commits
mailing list