[llvm] 7e73c2a - [X86][Costmodel] `getInterleavedMemoryOpCostAVX512()`: masked load can not be folded into a shuffle
Roman Lebedev via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 29 07:37:38 PST 2021
Author: Roman Lebedev
Date: 2021-11-29T18:37:07+03:00
New Revision: 7e73c2a66a8bb73b80aedc94c4e58598ac87e9d5
URL: https://github.com/llvm/llvm-project/commit/7e73c2a66a8bb73b80aedc94c4e58598ac87e9d5
DIFF: https://github.com/llvm/llvm-project/commit/7e73c2a66a8bb73b80aedc94c4e58598ac87e9d5.diff
LOG: [X86][Costmodel] `getInterleavedMemoryOpCostAVX512()`: masked load can not be folded into a shuffle
The mask on the shuffle is for the output, not the input.
Reviewed By: RKSimon
Differential Revision: https://reviews.llvm.org/D114697
Added:
Modified:
llvm/lib/Target/X86/X86TargetTransformInfo.cpp
llvm/test/Analysis/CostModel/X86/masked-interleaved-load-i16.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index a0332b7e3066e..bf2f95017451c 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -5275,7 +5275,8 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX512(
auto *SingleMemOpTy = FixedVectorType::get(VecTy->getElementType(),
LegalVT.getVectorNumElements());
InstructionCost MemOpCost;
- if (UseMaskForCond || UseMaskForGaps)
+ bool UseMaskedMemOp = UseMaskForCond || UseMaskForGaps;
+ if (UseMaskedMemOp)
MemOpCost = getMaskedMemoryOpCost(Opcode, SingleMemOpTy, Alignment,
AddressSpace, CostKind);
else
@@ -5286,7 +5287,7 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX512(
MVT VT = MVT::getVectorVT(MVT::getVT(VecTy->getScalarType()), VF);
InstructionCost MaskCost;
- if (UseMaskForCond || UseMaskForGaps) {
+ if (UseMaskedMemOp) {
APInt DemandedLoadStoreElts = APInt::getZero(VecTy->getNumElements());
for (unsigned Index : Indices) {
assert(Index < Factor && "Invalid index for interleaved memory op");
@@ -5349,9 +5350,10 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX512(
NumOfLoadsInInterleaveGrp;
// About a half of the loads may be folded in shuffles when we have only
- // one result. If we have more than one result, we do not fold loads at all.
+ // one result. If we have more than one result, or the loads are masked,
+ // we do not fold loads at all.
unsigned NumOfUnfoldedLoads =
- NumOfResults > 1 ? NumOfMemOps : NumOfMemOps / 2;
+ UseMaskedMemOp || NumOfResults > 1 ? NumOfMemOps : NumOfMemOps / 2;
// Get a number of shuffle operations per result.
unsigned NumOfShufflesPerResult =
diff --git a/llvm/test/Analysis/CostModel/X86/masked-interleaved-load-i16.ll b/llvm/test/Analysis/CostModel/X86/masked-interleaved-load-i16.ll
index 42591c8661ba9..b5a28ea1ab636 100644
--- a/llvm/test/Analysis/CostModel/X86/masked-interleaved-load-i16.ll
+++ b/llvm/test/Analysis/CostModel/X86/masked-interleaved-load-i16.ll
@@ -172,10 +172,10 @@ for.end:
; ENABLED_MASKED_STRIDED: LV: Checking a loop in "test"
;
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i4 = load i16, i16* %arrayidx6, align 2
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 6 for VF 2 For instruction: %i4 = load i16, i16* %arrayidx6, align 2
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 8 for VF 4 For instruction: %i4 = load i16, i16* %arrayidx6, align 2
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 8 for VF 8 For instruction: %i4 = load i16, i16* %arrayidx6, align 2
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 13 for VF 16 For instruction: %i4 = load i16, i16* %arrayidx6, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 7 for VF 2 For instruction: %i4 = load i16, i16* %arrayidx6, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 9 for VF 4 For instruction: %i4 = load i16, i16* %arrayidx6, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 9 for VF 8 For instruction: %i4 = load i16, i16* %arrayidx6, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 14 for VF 16 For instruction: %i4 = load i16, i16* %arrayidx6, align 2
define void @test(i16* noalias nocapture %points, i16* noalias nocapture readonly %x, i16* noalias nocapture readnone %y) {
entry:
More information about the llvm-commits
mailing list