[llvm] 371412e - [COST]Fix crash for non-power-2 vector shuffle mask.

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Fri Apr 29 07:29:43 PDT 2022


Author: Alexey Bataev
Date: 2022-04-29T07:28:07-07:00
New Revision: 371412e065a63107d5d79330da6757ff693d91cc

URL: https://github.com/llvm/llvm-project/commit/371412e065a63107d5d79330da6757ff693d91cc
DIFF: https://github.com/llvm/llvm-project/commit/371412e065a63107d5d79330da6757ff693d91cc.diff

LOG: [COST]Fix crash for non-power-2 vector shuffle mask.

Need to normalizize the mask to avoid possible crashes during attempts
to estimate cost of the very long shuffles with non-power-2 number of
elements in masks.

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86TargetTransformInfo.cpp
    llvm/test/Analysis/CostModel/X86/shuffle-non-pow-2.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 29e66ac90b66..d6291a1bb0ce 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -1238,15 +1238,18 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
         // copy of the previous destination register (the cost is
         // TTI::TCC_Basic). If the source register is just reused, the cost for
         // this operation is 0.
-        unsigned NormalizedVF = LT.second.getVectorNumElements() * NumOfSrcs;
+        unsigned E = *NumOfDests.getValue();
+        unsigned NormalizedVF =
+            LegalVT.getVectorNumElements() * std::max(NumOfSrcs, E);
+        unsigned NumOfSrcRegs = NormalizedVF / LegalVT.getVectorNumElements();
+        unsigned NumOfDestRegs = NormalizedVF / LegalVT.getVectorNumElements();
         SmallVector<int> NormalizedMask(NormalizedVF, UndefMaskElem);
         copy(Mask, NormalizedMask.begin());
-        unsigned E = *NumOfDests.getValue();
         unsigned PrevSrcReg = 0;
         ArrayRef<int> PrevRegMask;
         InstructionCost Cost = 0;
         processShuffleMasks(
-            NormalizedMask, NumOfSrcs, E, E, []() {},
+            NormalizedMask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs, []() {},
             [this, SingleOpTy, &PrevSrcReg, &PrevRegMask,
              &Cost](ArrayRef<int> RegMask, unsigned SrcReg, unsigned DestReg) {
               if (!ShuffleVectorInst::isIdentityMask(RegMask)) {

diff  --git a/llvm/test/Analysis/CostModel/X86/shuffle-non-pow-2.ll b/llvm/test/Analysis/CostModel/X86/shuffle-non-pow-2.ll
index df0d12a30652..365a678815f1 100644
--- a/llvm/test/Analysis/CostModel/X86/shuffle-non-pow-2.ll
+++ b/llvm/test/Analysis/CostModel/X86/shuffle-non-pow-2.ll
@@ -1,13 +1,31 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=x86_64-apple-darwin -passes="print<cost-model>" 2>&1 -disable-output -mattr=+sse2 | FileCheck %s
+; RUN: opt < %s -mtriple=x86_64-apple-darwin -passes="print<cost-model>" 2>&1 -disable-output -mattr=+sse2 | FileCheck %s -check-prefixes=SSE
+; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=corei7-avx -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s -check-prefixes=AVX
 
 define void @test() {
-; CHECK-LABEL: 'test'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %matins.2.2 = shufflevector <9 x double> undef, <9 x double> undef, <9 x i32> <i32 0, i32 3, i32 6, i32 1, i32 4, i32 7, i32 2, i32 5, i32 8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SSE-LABEL: 'test'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %matins.2.2 = shufflevector <9 x double> undef, <9 x double> undef, <9 x i32> <i32 0, i32 3, i32 6, i32 1, i32 4, i32 7, i32 2, i32 5, i32 8>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; AVX-LABEL: 'test'
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %matins.2.2 = shufflevector <9 x double> undef, <9 x double> undef, <9 x i32> <i32 0, i32 3, i32 6, i32 1, i32 4, i32 7, i32 2, i32 5, i32 8>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 entry:
   %matins.2.2 = shufflevector <9 x double> undef, <9 x double> undef, <9 x i32> <i32 0, i32 3, i32 6, i32 1, i32 4, i32 7, i32 2, i32 5, i32 8>
   ret void
 }
 
+define <12 x i64> @foo(<12 x i64> noundef %src) {
+; SSE-LABEL: 'foo'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shuffle = shufflevector <12 x i64> %src, <12 x i64> poison, <12 x i32> <i32 0, i32 3, i32 6, i32 9, i32 1, i32 4, i32 7, i32 10, i32 2, i32 5, i32 8, i32 11>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <12 x i64> %shuffle
+;
+; AVX-LABEL: 'foo'
+; AVX-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %shuffle = shufflevector <12 x i64> %src, <12 x i64> poison, <12 x i32> <i32 0, i32 3, i32 6, i32 9, i32 1, i32 4, i32 7, i32 10, i32 2, i32 5, i32 8, i32 11>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <12 x i64> %shuffle
+;
+entry:
+  %shuffle = shufflevector <12 x i64> %src, <12 x i64> poison, <12 x i32> <i32 0, i32 3, i32 6, i32 9, i32 1, i32 4, i32 7, i32 10, i32 2, i32 5, i32 8, i32 11>
+  ret <12 x i64> %shuffle
+}


        


More information about the llvm-commits mailing list