[llvm] 5db5275 - [CostModel] Return an invalid cost for memory ops with unsupported types

Tue Jun 8 04:07:57 PDT 2021

Author: Kerry McLaughlin
Date: 2021-06-08T12:07:36+01:00
New Revision: 5db52751a594410d0166d606b305b01a03f0ca3f

URL: https://github.com/llvm/llvm-project/commit/5db52751a594410d0166d606b305b01a03f0ca3f
DIFF: https://github.com/llvm/llvm-project/commit/5db52751a594410d0166d606b305b01a03f0ca3f.diff

LOG: [CostModel] Return an invalid cost for memory ops with unsupported types

Fixes getTypeConversion to return `TypeScalarizeScalableVector` when a scalable vector
type cannot be legalized by widening/splitting. When this is the method of legalization
found, getTypeLegalizationCost will return an Invalid cost.

The getMemoryOpCost, getMaskedMemoryOpCost & getGatherScatterOpCost functions already call
getTypeLegalizationCost and will now also return an Invalid cost for unsupported types.

Reviewed By: sdesmalen, david-arm

Differential Revision: https://reviews.llvm.org/D102515

Added: 
    llvm/test/Analysis/CostModel/AArch64/sve-illegal-types.ll
    llvm/test/Transforms/VectorCombine/AArch64/extract-cmp-binop.ll

Modified: 
    llvm/lib/CodeGen/TargetLoweringBase.cpp
    llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
    llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll
    llvm/test/Transforms/VectorCombine/X86/extract-cmp-binop.ll
    llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index f4e3fad11084d..d2c291f2ae72b 100644

--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -1016,8 +1016,8 @@ TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const {
     // If type is to be expanded, split the vector.
     //  <4 x i140> -> <2 x i140>
     if (LK.first == TypeExpandInteger) {
-      if (VT.getVectorElementCount() == ElementCount::getScalable(1))
-        report_fatal_error("Cannot legalize this scalable vector");
+      if (VT.getVectorElementCount().isScalable())
+        return LegalizeKind(TypeScalarizeScalableVector, EltVT);
       return LegalizeKind(TypeSplitVector,
                           VT.getHalfNumVectorElementsVT(Context));
     }
@@ -1080,7 +1080,7 @@ TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const {
   }
 
   if (VT.getVectorElementCount() == ElementCount::getScalable(1))
-    report_fatal_error("Cannot legalize this vector");
+    return LegalizeKind(TypeScalarizeScalableVector, EltVT);
 
   // Vectors with illegal element types are expanded.
   EVT NVT = EVT::getVectorVT(Context, EltVT,
@@ -1845,6 +1845,9 @@ TargetLoweringBase::getTypeLegalizationCost(const DataLayout &DL,
   while (true) {
     LegalizeKind LK = getTypeConversion(C, MTy);
 
+    if (LK.first == TypeScalarizeScalableVector)
+      return std::make_pair(InstructionCost::getInvalid(), MVT::getVT(Ty));
+
     if (LK.first == TypeLegal)
       return std::make_pair(Cost, MTy.getSimpleVT());
 

diff  --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 70d1d6a533366..7f02023322d80 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1288,6 +1288,8 @@ AArch64TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
     return BaseT::getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
                                         CostKind);
   auto LT = TLI->getTypeLegalizationCost(DL, Src);
+  if (!LT.first.isValid())
+    return InstructionCost::getInvalid();
   return LT.first * 2;
 }
 
@@ -1300,6 +1302,9 @@ InstructionCost AArch64TTIImpl::getGatherScatterOpCost(
                                          Alignment, CostKind, I);
   auto *VT = cast<VectorType>(DataTy);
   auto LT = TLI->getTypeLegalizationCost(DL, DataTy);
+  if (!LT.first.isValid())
+    return InstructionCost::getInvalid();
+
   ElementCount LegalVF = LT.second.getVectorElementCount();
   Optional<unsigned> MaxNumVScale = getMaxVScale();
   assert(MaxNumVScale && "Expected valid max vscale value");
@@ -1326,6 +1331,8 @@ InstructionCost AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
                                   CostKind);
 
   auto LT = TLI->getTypeLegalizationCost(DL, Ty);
+  if (!LT.first.isValid())
+    return InstructionCost::getInvalid();
 
   // TODO: consider latency as well for TCK_SizeAndLatency.
   if (CostKind == TTI::TCK_CodeSize || CostKind == TTI::TCK_SizeAndLatency)

diff  --git a/llvm/test/Analysis/CostModel/AArch64/sve-illegal-types.ll b/llvm/test/Analysis/CostModel/AArch64/sve-illegal-types.ll
new file mode 100644
index 0000000000000..eeb569dbb285c
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-illegal-types.ll
@@ -0,0 +1,40 @@
+; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve  < %s | FileCheck %s
+
+define void @load_store(<vscale x 1 x i128>* %ptrs) {
+; CHECK-LABEL: 'load_store'
+; CHECK-NEXT: Invalid cost for instruction: %load1 = load <vscale x 1 x i128>, <vscale x 1 x i128>* undef
+; CHECK-NEXT: Invalid cost for instruction: %load2 = load <vscale x 2 x i128>, <vscale x 2 x i128>* undef
+; CHECK-NEXT: Invalid cost for instruction: %load3 = load <vscale x 1 x fp128>, <vscale x 1 x fp128>* undef
+; CHECK-NEXT: Invalid cost for instruction: %load4 = load <vscale x 2 x fp128>, <vscale x 2 x fp128>* undef
+; CHECK-NEXT: Invalid cost for instruction: store <vscale x 1 x i128> %load1, <vscale x 1 x i128>* %ptrs
+  %load1 = load <vscale x 1 x i128>, <vscale x 1 x i128>* undef
+  %load2 = load <vscale x 2 x i128>, <vscale x 2 x i128>* undef
+  %load3 = load <vscale x 1 x fp128>, <vscale x 1 x fp128>* undef
+  %load4 = load <vscale x 2 x fp128>, <vscale x 2 x fp128>* undef
+  store <vscale x 1 x i128> %load1, <vscale x 1 x i128>* %ptrs
+  ret void
+}
+
+define void @masked_load_store(<vscale x 1 x i128>* %ptrs, <vscale x 1 x i128>* %val, <vscale x 1 x i1> %mask, <vscale x 1 x i128> %passthru) {
+; CHECK-LABEL: 'masked_load_store'
+; CHECK-NEXT: Invalid cost for instruction: %mload = call <vscale x 1 x i128> @llvm.masked.load.nxv1i128.p0nxv1i128(<vscale x 1 x i128>* %val, i32 8, <vscale x 1 x i1> %mask, <vscale x 1 x i128> %passthru)
+; CHECK-NEXT: Invalid cost for instruction: call void @llvm.masked.store.nxv1i128.p0nxv1i128(<vscale x 1 x i128> %mload, <vscale x 1 x i128>* %ptrs, i32 8, <vscale x 1 x i1> %mask)
+  %mload = call <vscale x 1 x i128> @llvm.masked.load.nxv1i128(<vscale x 1 x i128>* %val, i32 8, <vscale x 1 x i1> %mask, <vscale x 1 x i128> %passthru)
+  call void @llvm.masked.store.nxv1i128(<vscale x 1 x i128> %mload, <vscale x 1 x i128>* %ptrs, i32 8, <vscale x 1 x i1> %mask)
+  ret void
+}
+
+define void @masked_gather_scatter(<vscale x 1 x i128*> %ptrs, <vscale x 1 x i128*> %val, <vscale x 1 x i1> %mask, <vscale x 1 x i128> %passthru) {
+; CHECK-LABEL: 'masked_gather_scatter'
+; CHECK-NEXT: Invalid cost for instruction: %mgather = call <vscale x 1 x i128> @llvm.masked.gather.nxv1i128.nxv1p0i128(<vscale x 1 x i128*> %val, i32 0, <vscale x 1 x i1> %mask, <vscale x 1 x i128> %passthru)
+; CHECK-NEXT: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i128.nxv1p0i128(<vscale x 1 x i128> %mgather, <vscale x 1 x i128*> %ptrs, i32 0, <vscale x 1 x i1> %mask)
+  %mgather = call <vscale x 1 x i128> @llvm.masked.gather.nxv1i128(<vscale x 1 x i128*> %val, i32 0, <vscale x 1 x i1> %mask, <vscale x 1 x i128> %passthru)
+  call void @llvm.masked.scatter.nxv1i128(<vscale x 1 x i128> %mgather, <vscale x 1 x i128*> %ptrs, i32 0, <vscale x 1 x i1> %mask)
+  ret void
+}
+
+declare <vscale x 1 x i128> @llvm.masked.load.nxv1i128(<vscale x 1 x i128>*, i32, <vscale x 1 x i1>, <vscale x 1 x i128>)
+declare <vscale x 1 x i128> @llvm.masked.gather.nxv1i128(<vscale x 1 x i128*>, i32, <vscale x 1 x i1>, <vscale x 1 x i128>)
+
+declare void @llvm.masked.store.nxv1i128(<vscale x 1 x i128>, <vscale x 1 x i128>*, i32, <vscale x 1 x i1>)
+declare void @llvm.masked.scatter.nxv1i128(<vscale x 1 x i128>, <vscale x 1 x i128*>, i32, <vscale x 1 x i1>)

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll
index a29cf1823f6a1..2e600d461e899 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll
@@ -1,8 +1,8 @@
 ; REQUIRES: asserts
 ; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -loop-vectorize -S -scalable-vectorization=on < %s 2>&1 | FileCheck %s
 ; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -loop-vectorize -pass-remarks-analysis=loop-vectorize -debug-only=loop-vectorize -S -scalable-vectorization=on < %s 2>&1 | FileCheck --check-prefix=CHECK-DBG %s
-; RUN: opt -mtriple=aarch64-none-linux-gnu -loop-vectorize -pass-remarks-analysis=loop-vectorize -debug-only=loop-vectorize -S -scalable-vectorization=on < %s 2>&1 | FileCheck --check-prefix=CHECK-NO-SVE %s
-; RUN: opt -mtriple=aarch64-none-linux-gnu -loop-vectorize -force-target-supports-scalable-vectors=true -pass-remarks-analysis=loop-vectorize -debug-only=loop-vectorize -S -scalable-vectorization=on < %s 2>&1 | FileCheck --check-prefix=CHECK-NO-MAX-VSCALE %s
+; RUN: opt -mtriple=aarch64-none-linux-gnu -loop-vectorize -pass-remarks-analysis=loop-vectorize -debug-only=loop-vectorize -S -scalable-vectorization=on < %s 2>%t | FileCheck --check-prefix=CHECK-NO-SVE %s
+; RUN: cat %t | FileCheck %s -check-prefix=CHECK-NO-SVE-REMARKS
 
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 
@@ -309,11 +309,12 @@ exit:
 !16 = !{!"llvm.loop.vectorize.width", i32 16}
 !17 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
 
-; CHECK-NO-SVE-LABEL: LV: Checking a loop in "test_no_sve"
-; CHECK-NO-SVE: LV: Disabling scalable vectorization, because target does not support scalable vectors.
-; CHECK-NO-SVE: remark: <unknown>:0:0: Disabling scalable vectorization, because target does not support scalable vectors.
-; CHECK-NO-SVE: LV: User VF=vscale x 4 is unsafe. Ignoring scalable UserVF.
-; CHECK-NO-SVE: LV: Selecting VF: 4.
+; CHECK-NO-SVE-REMARKS-LABEL: LV: Checking a loop in "test_no_sve"
+; CHECK-NO-SVE-REMARKS: LV: Disabling scalable vectorization, because target does not support scalable vectors.
+; CHECK-NO-SVE-REMARKS: remark: <unknown>:0:0: Disabling scalable vectorization, because target does not support scalable vectors.
+; CHECK-NO-SVE-REMARKS: LV: User VF=vscale x 4 is unsafe. Ignoring scalable UserVF.
+; CHECK-NO-SVE-REMARKS: LV: Selecting VF: 4.
+; CHECK-NO-SVE-LABEL: @test_no_sve
 ; CHECK-NO-SVE: <4 x i32>
 ; CHECK-NO-SVE-NOT: <vscale x 4 x i32>
 define void @test_no_sve(i32* %a, i32* %b) {
@@ -343,11 +344,12 @@ exit:
 ; Test the LV falls back to fixed-width vectorization if scalable vectors are
 ; supported but max vscale is undefined.
 ;
-; CHECK-NO-MAX-VSCALE-LABEL: LV: Checking a loop in "test_no_max_vscale"
-; CEHCK-NO-MAX-VSCALE: The max safe fixed VF is: 4.
-; CHECK-NO-MAX-VSCALE: LV: User VF=vscale x 4 is unsafe. Ignoring scalable UserVF.
-; CHECK-NO-MAX-VSCALE: LV: Selecting VF: 4.
-; CHECK-NO-MAX-VSCALE: <4 x i32>
+; CHECK-NO-SVE-REMARKS-LABEL: LV: Checking a loop in "test_no_max_vscale"
+; CHECK-NO-SVE-REMARKS: The max safe fixed VF is: 4.
+; CHECK-NO-SVE-REMARKS: LV: User VF=vscale x 4 is unsafe. Ignoring scalable UserVF.
+; CHECK-NO-SVE-REMARKS: LV: Selecting VF: 4.
+; CHECK-NO-SVE-LABEL: @test_no_max_vscale
+; CHECK-NO-SVE: <4 x i32>
 define void @test_no_max_vscale(i32* %a, i32* %b) {
 entry:
   br label %loop

diff  --git a/llvm/test/Transforms/VectorCombine/AArch64/extract-cmp-binop.ll b/llvm/test/Transforms/VectorCombine/AArch64/extract-cmp-binop.ll
new file mode 100644
index 0000000000000..bf61b3b2b730b
--- /dev/null
+++ b/llvm/test/Transforms/VectorCombine/AArch64/extract-cmp-binop.ll
@@ -0,0 +1,21 @@
+; RUN: opt -vector-combine -S %s | FileCheck %s
+
+; Negative test for extract + cmp + binop - don't try this with scalable vectors.
+; Moved from X86/extract-cmp-binop.ll
+
+define i1 @scalable(<vscale x 4 x i32> %a) {
+; CHECK-LABEL: @scalable(
+; CHECK-NEXT:    [[E1:%.*]] = extractelement <vscale x 4 x i32> [[A:%.*]], i32 3
+; CHECK-NEXT:    [[E2:%.*]] = extractelement <vscale x 4 x i32> [[A]], i32 1
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[E1]], 42
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp sgt i32 [[E2]], -8
+; CHECK-NEXT:    [[R:%.*]] = xor i1 [[CMP1]], [[CMP2]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %e1 = extractelement <vscale x 4 x i32> %a, i32 3
+  %e2 = extractelement <vscale x 4 x i32> %a, i32 1
+  %cmp1 = icmp sgt i32 %e1, 42
+  %cmp2 = icmp sgt i32 %e2, -8
+  %r = xor i1 %cmp1, %cmp2
+  ret i1 %r
+}

diff  --git a/llvm/test/Transforms/VectorCombine/X86/extract-cmp-binop.ll b/llvm/test/Transforms/VectorCombine/X86/extract-cmp-binop.ll
index 1a0d77e547f2f..73e52c13a465f 100644
--- a/llvm/test/Transforms/VectorCombine/X86/extract-cmp-binop.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/extract-cmp-binop.ll
@@ -148,22 +148,3 @@ define i1 @
diff erent_source_vec(<4 x i32> %a, <4 x i32> %b) {
   %r = and i1 %cmp1, %cmp2
   ret i1 %r
 }
-
-; Negative test - don't try this with scalable vectors.
-
-define i1 @scalable(<vscale x 4 x i32> %a) {
-; CHECK-LABEL: @scalable(
-; CHECK-NEXT:    [[E1:%.*]] = extractelement <vscale x 4 x i32> [[A:%.*]], i32 3
-; CHECK-NEXT:    [[E2:%.*]] = extractelement <vscale x 4 x i32> [[A]], i32 1
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[E1]], 42
-; CHECK-NEXT:    [[CMP2:%.*]] = icmp sgt i32 [[E2]], -8
-; CHECK-NEXT:    [[R:%.*]] = xor i1 [[CMP1]], [[CMP2]]
-; CHECK-NEXT:    ret i1 [[R]]
-;
-  %e1 = extractelement <vscale x 4 x i32> %a, i32 3
-  %e2 = extractelement <vscale x 4 x i32> %a, i32 1
-  %cmp1 = icmp sgt i32 %e1, 42
-  %cmp2 = icmp sgt i32 %e2, -8
-  %r = xor i1 %cmp1, %cmp2
-  ret i1 %r
-}

diff  --git a/llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp b/llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp
index 0024d83687389..03f1fdcf37d1c 100644
--- a/llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp
+++ b/llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp
@@ -573,9 +573,11 @@ TEST_F(AArch64SelectionDAGTest, getTypeConversion_WidenScalableEVT) {
   EXPECT_EQ(getTypeToTransformTo(FromVT), ToVT);
 }
 
-TEST_F(AArch64SelectionDAGTest, getTypeConversion_NoScalarizeEVT_nxv1f128) {
-  EVT FromVT = EVT::getVectorVT(Context, MVT::f128, 1, true);
-  EXPECT_DEATH(getTypeAction(FromVT), "Cannot legalize this vector");
+TEST_F(AArch64SelectionDAGTest,
+       getTypeConversion_ScalarizeScalableEVT_nxv1f128) {
+  EVT VT = EVT::getVectorVT(Context, MVT::f128, ElementCount::getScalable(1));
+  EXPECT_EQ(getTypeAction(VT), TargetLoweringBase::TypeScalarizeScalableVector);
+  EXPECT_EQ(getTypeToTransformTo(VT), MVT::f128);
 }
 
 TEST_F(AArch64SelectionDAGTest, TestFold_STEP_VECTOR) {