[llvm] 371412e - [COST]Fix crash for non-power-2 vector shuffle mask.
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 29 07:29:43 PDT 2022
Author: Alexey Bataev
Date: 2022-04-29T07:28:07-07:00
New Revision: 371412e065a63107d5d79330da6757ff693d91cc
URL: https://github.com/llvm/llvm-project/commit/371412e065a63107d5d79330da6757ff693d91cc
DIFF: https://github.com/llvm/llvm-project/commit/371412e065a63107d5d79330da6757ff693d91cc.diff
LOG: [COST]Fix crash for non-power-2 vector shuffle mask.
Need to normalizize the mask to avoid possible crashes during attempts
to estimate cost of the very long shuffles with non-power-2 number of
elements in masks.
Added:
Modified:
llvm/lib/Target/X86/X86TargetTransformInfo.cpp
llvm/test/Analysis/CostModel/X86/shuffle-non-pow-2.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 29e66ac90b66..d6291a1bb0ce 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -1238,15 +1238,18 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
// copy of the previous destination register (the cost is
// TTI::TCC_Basic). If the source register is just reused, the cost for
// this operation is 0.
- unsigned NormalizedVF = LT.second.getVectorNumElements() * NumOfSrcs;
+ unsigned E = *NumOfDests.getValue();
+ unsigned NormalizedVF =
+ LegalVT.getVectorNumElements() * std::max(NumOfSrcs, E);
+ unsigned NumOfSrcRegs = NormalizedVF / LegalVT.getVectorNumElements();
+ unsigned NumOfDestRegs = NormalizedVF / LegalVT.getVectorNumElements();
SmallVector<int> NormalizedMask(NormalizedVF, UndefMaskElem);
copy(Mask, NormalizedMask.begin());
- unsigned E = *NumOfDests.getValue();
unsigned PrevSrcReg = 0;
ArrayRef<int> PrevRegMask;
InstructionCost Cost = 0;
processShuffleMasks(
- NormalizedMask, NumOfSrcs, E, E, []() {},
+ NormalizedMask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs, []() {},
[this, SingleOpTy, &PrevSrcReg, &PrevRegMask,
&Cost](ArrayRef<int> RegMask, unsigned SrcReg, unsigned DestReg) {
if (!ShuffleVectorInst::isIdentityMask(RegMask)) {
diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-non-pow-2.ll b/llvm/test/Analysis/CostModel/X86/shuffle-non-pow-2.ll
index df0d12a30652..365a678815f1 100644
--- a/llvm/test/Analysis/CostModel/X86/shuffle-non-pow-2.ll
+++ b/llvm/test/Analysis/CostModel/X86/shuffle-non-pow-2.ll
@@ -1,13 +1,31 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=x86_64-apple-darwin -passes="print<cost-model>" 2>&1 -disable-output -mattr=+sse2 | FileCheck %s
+; RUN: opt < %s -mtriple=x86_64-apple-darwin -passes="print<cost-model>" 2>&1 -disable-output -mattr=+sse2 | FileCheck %s -check-prefixes=SSE
+; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=corei7-avx -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s -check-prefixes=AVX
define void @test() {
-; CHECK-LABEL: 'test'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %matins.2.2 = shufflevector <9 x double> undef, <9 x double> undef, <9 x i32> <i32 0, i32 3, i32 6, i32 1, i32 4, i32 7, i32 2, i32 5, i32 8>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SSE-LABEL: 'test'
+; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %matins.2.2 = shufflevector <9 x double> undef, <9 x double> undef, <9 x i32> <i32 0, i32 3, i32 6, i32 1, i32 4, i32 7, i32 2, i32 5, i32 8>
+; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; AVX-LABEL: 'test'
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %matins.2.2 = shufflevector <9 x double> undef, <9 x double> undef, <9 x i32> <i32 0, i32 3, i32 6, i32 1, i32 4, i32 7, i32 2, i32 5, i32 8>
+; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
entry:
%matins.2.2 = shufflevector <9 x double> undef, <9 x double> undef, <9 x i32> <i32 0, i32 3, i32 6, i32 1, i32 4, i32 7, i32 2, i32 5, i32 8>
ret void
}
+define <12 x i64> @foo(<12 x i64> noundef %src) {
+; SSE-LABEL: 'foo'
+; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuffle = shufflevector <12 x i64> %src, <12 x i64> poison, <12 x i32> <i32 0, i32 3, i32 6, i32 9, i32 1, i32 4, i32 7, i32 10, i32 2, i32 5, i32 8, i32 11>
+; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <12 x i64> %shuffle
+;
+; AVX-LABEL: 'foo'
+; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shuffle = shufflevector <12 x i64> %src, <12 x i64> poison, <12 x i32> <i32 0, i32 3, i32 6, i32 9, i32 1, i32 4, i32 7, i32 10, i32 2, i32 5, i32 8, i32 11>
+; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <12 x i64> %shuffle
+;
+entry:
+ %shuffle = shufflevector <12 x i64> %src, <12 x i64> poison, <12 x i32> <i32 0, i32 3, i32 6, i32 9, i32 1, i32 4, i32 7, i32 10, i32 2, i32 5, i32 8, i32 11>
+ ret <12 x i64> %shuffle
+}
More information about the llvm-commits
mailing list