[llvm] 670dd40 - [Analysis] Fix getNumberOfParts to return 0 when the answer is unknown

Wed Nov 17 04:07:17 PST 2021

Author: David Sherwood
Date: 2021-11-17T12:07:09Z
New Revision: 670dd402441fdda4779fd59604a8b9b526efd7e6

URL: https://github.com/llvm/llvm-project/commit/670dd402441fdda4779fd59604a8b9b526efd7e6
DIFF: https://github.com/llvm/llvm-project/commit/670dd402441fdda4779fd59604a8b9b526efd7e6.diff

LOG: [Analysis] Fix getNumberOfParts to return 0 when the answer is unknown

When asking how many parts are required for a scalable vector type
there are occasions when it cannot be computed. For example, <vscale x 1 x i3>
is one such vector for AArch64+SVE because at the moment no matter how we
promote the i3 type we never end up with a legal vector. This means
that getTypeConversion returns TypeScalarizeScalableVector as the
LegalizeKind, and then getTypeLegalizationCost returns an invalid cost.
This then causes BasicTTImpl::getNumberOfParts to dereference an invalid
cost, which triggers an assert. This patch changes getNumberOfParts to
return 0 for such cases, since the definition of getNumberOfParts in
TargetTransformInfo.h states that we can use a return value of 0 to represent
an unknown answer.

Currently, LoopVectorize.cpp is the only place where we need to check for
0 as a return value, because all other instances will not currently
ask for the number of parts for <vscale x 1 x iX> types.

In addition, I have changed the target-independent interface for
getNumberOfParts to return 1 and assume there is a single register
that can fit the type. The loop vectoriser has lots of tests that are
target-independent and they relied upon the 0 value to mean the
answer is known and that we are not scalarising the vector.

I have added tests here that show we correctly return an invalid cost
for VF=vscale x 1 when the loop contains unusual types such as i7:

  Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll

Differential Revision: https://reviews.llvm.org/D113772

Added: 
    llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll

Modified: 
    llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
    llvm/include/llvm/CodeGen/BasicTTIImpl.h
    llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index bbcfd9a45eb3b..05ef2495475fc 100644

--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -632,7 +632,8 @@ class TargetTransformInfoImplBase {
     return 1;
   }
 
-  unsigned getNumberOfParts(Type *Tp) const { return 0; }
+  // Assume that we have a register of the right size for the type.
+  unsigned getNumberOfParts(Type *Tp) const { return 1; }
 
   InstructionCost getAddressComputationCost(Type *Tp, ScalarEvolution *,
                                             const SCEV *) const {

diff  --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index c265a22f0e6eb..324b7dcfb3aca 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -2026,7 +2026,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
   unsigned getNumberOfParts(Type *Tp) {
     std::pair<InstructionCost, MVT> LT =
         getTLI()->getTypeLegalizationCost(DL, Tp);
-    return *LT.first.getValue();
+    return LT.first.isValid() ? *LT.first.getValue() : 0;
   }
 
   InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *,

diff  --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index d6134c764bd24..07d0001bc045d 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7428,9 +7428,14 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
   Type *VectorTy;
   InstructionCost C = getInstructionCost(I, VF, VectorTy);
 
-  bool TypeNotScalarized =
-      VF.isVector() && VectorTy->isVectorTy() &&
-      TTI.getNumberOfParts(VectorTy) < VF.getKnownMinValue();
+  bool TypeNotScalarized = false;
+  if (VF.isVector() && VectorTy->isVectorTy()) {
+    unsigned NumParts = TTI.getNumberOfParts(VectorTy);
+    if (NumParts)
+      TypeNotScalarized = NumParts < VF.getKnownMinValue();
+    else
+      C = InstructionCost::getInvalid();
+  }
   return VectorizationCostTy(C, TypeNotScalarized);
 }
 

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll
new file mode 100644
index 0000000000000..712bf77894aa8
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll
@@ -0,0 +1,51 @@
+; REQUIRES: asserts
+; RUN: opt -scalable-vectorization=on -loop-vectorize -S < %s -debug 2>%t | FileCheck %s
+; RUN: cat %t | FileCheck %s --check-prefix=DEBUG
+
+target triple = "aarch64-unknown-linux-gnu"
+
+; DEBUG: Found an estimated cost of Invalid for VF vscale x 1 For instruction:   %indvars.iv1294 = phi i7 [ %indvars.iv.next1295, %for.body ], [ 0, %entry ]
+; DEBUG: Found an estimated cost of Invalid for VF vscale x 1 For instruction:   %addi7 = add i7 %indvars.iv1294, 0
+; DEBUG: Found an estimated cost of Invalid for VF vscale x 1 For instruction:   %indvars.iv.next1295 = add i7 %indvars.iv1294, 1
+
+define void @induction_i7(i64* %dst) #0 {
+; CHECK-LABEL: @induction_i7(
+; CHECK:       vector.ph:
+; CHECK:         [[TMP4:%.*]] = call <vscale x 2 x i8> @llvm.experimental.stepvector.nxv2i8()
+; CHECK:         [[TMP5:%.*]] = trunc <vscale x 2 x i8> %4 to <vscale x 2 x i7>
+; CHECK-NEXT:    [[TMP6:%.*]] = add <vscale x 2 x i7> [[TMP5]], zeroinitializer
+; CHECK-NEXT:    [[TMP7:%.*]] = mul <vscale x 2 x i7> [[TMP6]], shufflevector (<vscale x 2 x i7> insertelement (<vscale x 2 x i7> poison, i7 1, i32 0), <vscale x 2 x i7> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    [[INDUCTION:%.*]] = add <vscale x 2 x i7> zeroinitializer, [[TMP7]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <vscale x 2 x i7> [ [[INDUCTION]], %vector.ph ], [ [[VEC_IND_NEXT:%.*]], %vector.body ]
+; CHECK-NEXT:    [[TMP10:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP11:%.*]] = add <vscale x 2 x i7> [[VEC_IND]], zeroinitializer
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i64, i64* [[DST:%.*]], i64 [[TMP10]]
+; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i64, i64* [[TMP12]], i32 0
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast i64* [[TMP13]] to <vscale x 2 x i64>*
+; CHECK-NEXT:    store <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64>* [[TMP14]], align 8
+; CHECK-NEXT:    [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP16:%.*]] = mul i64 [[TMP15]], 2
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP16]]
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <vscale x 2 x i7> [[VEC_IND]], 
+;
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv1294 = phi i7 [ %indvars.iv.next1295, %for.body ], [ 0, %entry ]
+  %indvars.iv1286 = phi i64 [ %indvars.iv.next1287, %for.body ], [ 0, %entry ]
+  %addi7 = add i7 %indvars.iv1294, 0
+  %arrayidx = getelementptr inbounds i64, i64* %dst, i64 %indvars.iv1286
+  store i64 0, i64* %arrayidx, align 8
+  %indvars.iv.next1287 = add nuw nsw i64 %indvars.iv1286, 1
+  %indvars.iv.next1295 = add i7 %indvars.iv1294, 1
+  %exitcond = icmp eq i64 %indvars.iv.next1287, 64
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+attributes #0 = {"target-features"="+sve"}