[llvm-branch-commits] [llvm] 60e4698 - [CostModel]Replace FixedVectorType by VectorType in costgetIntrinsicInstrCost

Wed Dec 16 05:12:53 PST 2020

Author: Caroline Concatto
Date: 2020-12-16T13:06:23Z
New Revision: 60e4698b9aba8d9a2b27ac8a636c95ad1f7d94e0

URL: https://github.com/llvm/llvm-project/commit/60e4698b9aba8d9a2b27ac8a636c95ad1f7d94e0
DIFF: https://github.com/llvm/llvm-project/commit/60e4698b9aba8d9a2b27ac8a636c95ad1f7d94e0.diff

LOG: [CostModel]Replace FixedVectorType by VectorType in costgetIntrinsicInstrCost

This patch replaces FixedVectorType by VectorType in getIntrinsicInstrCost
in BasicTTIImpl.h. It re-arranges the scalable type test earlier return
and add tests for scalable types.

Depends on D91532

Differential Revision: https://reviews.llvm.org/D92094

Added: 
    llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-cctz-ctlz.ll

Modified: 
    llvm/include/llvm/CodeGen/BasicTTIImpl.h

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 05c5c835d74a..7dca7cd291c9 100644

--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1202,14 +1202,11 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
     if (ICA.isTypeBasedOnly())
       return getTypeBasedIntrinsicInstrCost(ICA, CostKind);
 
-    // TODO: Handle scalable vectors?
     Type *RetTy = ICA.getReturnType();
-    if (isa<ScalableVectorType>(RetTy))
-      return BaseT::getIntrinsicInstrCost(ICA, CostKind);
 
     ElementCount VF = ICA.getVectorFactor();
     ElementCount RetVF =
-        (RetTy->isVectorTy() ? cast<FixedVectorType>(RetTy)->getElementCount()
+        (RetTy->isVectorTy() ? cast<VectorType>(RetTy)->getElementCount()
                              : ElementCount::getFixed(1));
     assert((RetVF.isScalar() || VF.isScalar()) &&
            "VF > 1 and RetVF is a vector type");
@@ -1238,6 +1235,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
       return thisT()->getMemcpyCost(ICA.getInst());
 
     case Intrinsic::masked_scatter: {
+      if (isa<ScalableVectorType>(RetTy))
+        return BaseT::getIntrinsicInstrCost(ICA, CostKind);
       assert(VF.isScalar() && "Can't vectorize types here.");
       const Value *Mask = Args[3];
       bool VarMask = !isa<Constant>(Mask);
@@ -1247,6 +1246,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
                                              VarMask, Alignment, CostKind, I);
     }
     case Intrinsic::masked_gather: {
+      if (isa<ScalableVectorType>(RetTy))
+        return BaseT::getIntrinsicInstrCost(ICA, CostKind);
       assert(VF.isScalar() && "Can't vectorize types here.");
       const Value *Mask = Args[2];
       bool VarMask = !isa<Constant>(Mask);
@@ -1265,17 +1266,23 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
     case Intrinsic::vector_reduce_fmin:
     case Intrinsic::vector_reduce_umax:
     case Intrinsic::vector_reduce_umin: {
+      if (isa<ScalableVectorType>(RetTy))
+        return BaseT::getIntrinsicInstrCost(ICA, CostKind);
       IntrinsicCostAttributes Attrs(IID, RetTy, Args[0]->getType(), FMF, 1, I);
       return getTypeBasedIntrinsicInstrCost(Attrs, CostKind);
     }
     case Intrinsic::vector_reduce_fadd:
     case Intrinsic::vector_reduce_fmul: {
+      if (isa<ScalableVectorType>(RetTy))
+        return BaseT::getIntrinsicInstrCost(ICA, CostKind);
       IntrinsicCostAttributes Attrs(
           IID, RetTy, {Args[0]->getType(), Args[1]->getType()}, FMF, 1, I);
       return getTypeBasedIntrinsicInstrCost(Attrs, CostKind);
     }
     case Intrinsic::fshl:
     case Intrinsic::fshr: {
+      if (isa<ScalableVectorType>(RetTy))
+        return BaseT::getIntrinsicInstrCost(ICA, CostKind);
       const Value *X = Args[0];
       const Value *Y = Args[1];
       const Value *Z = Args[2];
@@ -1316,6 +1323,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
       return Cost;
     }
     }
+    // TODO: Handle the remaining intrinsic with scalable vector type
+    if (isa<ScalableVectorType>(RetTy))
+      return BaseT::getIntrinsicInstrCost(ICA, CostKind);
 
     // Assume that we need to scalarize this intrinsic.
     SmallVector<Type *, 4> Types;

diff  --git a/llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-cctz-ctlz.ll b/llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-cctz-ctlz.ll
new file mode 100644
index 000000000000..484aa2a01130
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-cctz-ctlz.ll
@@ -0,0 +1,33 @@
+; Checks getIntrinsicInstrCost in BasicTTIImpl.h with SVE for CTLZ and CCTZ
+
+; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve  < %s 2>%t | FileCheck %s
+
+; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
+
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
+; WARN-NOT: warning
+
+; Check for CTLZ
+
+define void  @ctlz_nxv4i32(<vscale x 4 x i32> %A) {
+; CHECK-LABEL: 'ctlz_nxv4i32'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %1 = tail call <vscale x 4 x i32> @llvm.ctlz.nxv4i32(<vscale x 4 x i32> %A, i1 true)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:   ret void
+
+  %1 = tail call <vscale x 4 x i32> @llvm.ctlz.nxv4i32(<vscale x 4 x i32> %A, i1 true)
+  ret void
+}
+
+; Check for CCTZ
+
+define void  @cttz_nxv4i32(<vscale x 4 x i32> %A) {
+; CHECK-LABEL: 'cttz_nxv4i32'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %1 = tail call <vscale x 4 x i32> @llvm.cttz.nxv4i32(<vscale x 4 x i32> %A, i1 true)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:   ret void
+
+  %1 = tail call <vscale x 4 x i32> @llvm.cttz.nxv4i32(<vscale x 4 x i32> %A, i1 true)
+  ret void
+}
+
+declare <vscale x 4 x i32> @llvm.ctlz.nxv4i32(<vscale x 4 x i32>, i1)
+declare <vscale x 4 x i32> @llvm.cttz.nxv4i32(<vscale x 4 x i32>, i1)