[llvm] d79ee3a - [ARM] Add a very basic active_lane_mask cost

Sat Oct 17 02:09:56 PDT 2020

Author: David Green
Date: 2020-10-17T10:09:42+01:00
New Revision: d79ee3a807115f8b96f9f8aa13d99cdea1652980

URL: https://github.com/llvm/llvm-project/commit/d79ee3a807115f8b96f9f8aa13d99cdea1652980
DIFF: https://github.com/llvm/llvm-project/commit/d79ee3a807115f8b96f9f8aa13d99cdea1652980.diff

LOG: [ARM] Add a very basic active_lane_mask cost

This adds a very basic cost for active_lane_mask under MVE - making the
assumption that they will be free and then apologizing for that in a
comment.

In reality they may either be free (by being nicely folded into a tail
predicated loop), cost the same as a VCTP or be expanded into vdup's,
adds and cmp's. It is difficult to detect the difference from a single
getIntrinsicInstrCost call, so makes the assumption that the vectorizer
is adding them, and only added them where it makes sense.

We may need to change this in the future to better model predicate costs
in the vectorizer, especially at -Os or non-tail predicated loops. The
vectorizer currently does not query the cost of these instructions but
that will change in the future and a zero cost there probably makes the
most sense at the moment.

Differential Revision: https://reviews.llvm.org/D88989

Added: 
    llvm/test/Analysis/CostModel/ARM/mve-active_lane_mask.ll

Modified: 
    llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
    llvm/lib/Target/ARM/ARMTargetTransformInfo.h

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 82156af27776..1cb9e7283007 100644

--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -1409,6 +1409,21 @@ unsigned ARMTTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
   return ScalarCost;
 }
 
+int ARMTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
+                                      TTI::TargetCostKind CostKind) {
+  // Currently we make a somewhat optimistic assumption that active_lane_mask's
+  // are always free. In reality it may be freely folded into a tail predicated
+  // loop, expanded into a VCPT or expanded into a lot of add/icmp code. We
+  // may need to improve this in the future, but being able to detect if it
+  // is free or not involves looking at a lot of other code. We currently assume
+  // that the vectorizer inserted these, and knew what it was doing in adding
+  // one.
+  if (ST->hasMVEIntegerOps() && ICA.getID() == Intrinsic::get_active_lane_mask)
+    return 0;
+
+  return BaseT::getIntrinsicInstrCost(ICA, CostKind);
+}
+
 bool ARMTTIImpl::isLoweredToCall(const Function *F) {
   if (!F->isIntrinsic())
     BaseT::isLoweredToCall(F);

diff  --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index 689b484ad976..ee80be5d7b48 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -247,6 +247,9 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
                                   Align Alignment, TTI::TargetCostKind CostKind,
                                   const Instruction *I = nullptr);
 
+  int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
+                            TTI::TargetCostKind CostKind);
+
   bool maybeLoweredToCall(Instruction &I);
   bool isLoweredToCall(const Function *F);
   bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,

diff  --git a/llvm/test/Analysis/CostModel/ARM/mve-active_lane_mask.ll b/llvm/test/Analysis/CostModel/ARM/mve-active_lane_mask.ll
new file mode 100644
index 000000000000..ae23b3e3c449
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/ARM/mve-active_lane_mask.ll
@@ -0,0 +1,37 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt < %s -S -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp -cost-model -analyze | FileCheck %s
+
+; Note that these instructions like this (not in a look that could be tail
+; predicated) should not really be free. We currently assume that all active
+; lane masks are free.
+
+define void @v4i32(i32 %index, i32 %TC) {
+; CHECK-LABEL: 'v4i32'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %TC)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %TC)
+  ret void
+}
+
+define void @v8i16(i32 %index, i32 %TC) {
+; CHECK-LABEL: 'v8i16'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %TC)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %TC)
+  ret void
+}
+
+define void @v16i8(i32 %index, i32 %TC) {
+; CHECK-LABEL: 'v16i8'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 %index, i32 %TC)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 %index, i32 %TC)
+  ret void
+}
+
+declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32)
+declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32, i32)
+declare <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32, i32)