[llvm] r300056 - [LoopVectorizer, TTI] New method supportsEfficientVectorElementLoadStore()

Wed Apr 12 05:41:37 PDT 2017

Author: jonpa
Date: Wed Apr 12 07:41:37 2017
New Revision: 300056

URL: http://llvm.org/viewvc/llvm-project?rev=300056&view=rev
Log:
[LoopVectorizer, TTI]  New method supportsEfficientVectorElementLoadStore()

Since SystemZ supports vector element load/store instructions, there is no
need for extracts/inserts if a vector load/store gets scalarized.

This patch lets Target specify that it supports such instructions by means of
a new TTI hook that defaults to false.

The use for this is in the LoopVectorizer getScalarizationOverhead() method,
which will with this patch produce a smaller sum for a vector load/store on
SystemZ.

New test: test/Transforms/LoopVectorize/SystemZ/load-store-scalarization-cost.ll

Review: Adam Nemet
https://reviews.llvm.org/D30680

Added:
    llvm/trunk/test/Transforms/LoopVectorize/SystemZ/load-store-scalarization-cost.ll
Modified:
    llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h
    llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h
    llvm/trunk/lib/Analysis/TargetTransformInfo.cpp
    llvm/trunk/lib/Target/SystemZ/SystemZTargetTransformInfo.h
    llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp

Modified: llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h?rev=300056&r1=300055&r2=300056&view=diff
==============================================================================

--- llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h (original)
+++ llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h Wed Apr 12 07:41:37 2017
@@ -437,6 +437,11 @@ public:
   unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
                                             unsigned VF) const;
 
+  /// If target has efficient vector element load/store instructions, it can
+  /// return true here so that insertion/extraction costs are not added to
+  /// the scalarization cost of a load/store.
+  bool supportsEfficientVectorElementLoadStore() const;
+
   /// \brief Don't restrict interleaved unrolling to small loops.
   bool enableAggressiveInterleaving(bool LoopHasReductions) const;
 
@@ -790,6 +795,7 @@ public:
   getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) = 0;
   virtual unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
                                                     unsigned VF) = 0;
+  virtual bool supportsEfficientVectorElementLoadStore() = 0;
   virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
   virtual bool enableInterleavedAccessVectorization() = 0;
   virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
@@ -996,6 +1002,10 @@ public:
     return Impl.getOperandsScalarizationOverhead(Args, VF);
   }
 
+  bool supportsEfficientVectorElementLoadStore() override {
+    return Impl.supportsEfficientVectorElementLoadStore();
+  }
+
   bool enableAggressiveInterleaving(bool LoopHasReductions) override {
     return Impl.enableAggressiveInterleaving(LoopHasReductions);
   }

Modified: llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h?rev=300056&r1=300055&r2=300056&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h (original)
+++ llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h Wed Apr 12 07:41:37 2017
@@ -262,6 +262,8 @@ public:
   unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
                                             unsigned VF) { return 0; }
 
+  bool supportsEfficientVectorElementLoadStore() { return false; }
+
   bool enableAggressiveInterleaving(bool LoopHasReductions) { return false; }
 
   bool enableInterleavedAccessVectorization() { return false; }

Modified: llvm/trunk/lib/Analysis/TargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/TargetTransformInfo.cpp?rev=300056&r1=300055&r2=300056&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/TargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Analysis/TargetTransformInfo.cpp Wed Apr 12 07:41:37 2017
@@ -197,6 +197,10 @@ getOperandsScalarizationOverhead(ArrayRe
   return TTIImpl->getOperandsScalarizationOverhead(Args, VF);
 }
 
+bool TargetTransformInfo::supportsEfficientVectorElementLoadStore() const {
+  return TTIImpl->supportsEfficientVectorElementLoadStore();
+}
+
 bool TargetTransformInfo::enableAggressiveInterleaving(bool LoopHasReductions) const {
   return TTIImpl->enableAggressiveInterleaving(LoopHasReductions);
 }

Modified: llvm/trunk/lib/Target/SystemZ/SystemZTargetTransformInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZTargetTransformInfo.h?rev=300056&r1=300055&r2=300056&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZTargetTransformInfo.h (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZTargetTransformInfo.h Wed Apr 12 07:41:37 2017
@@ -55,6 +55,7 @@ public:
   unsigned getNumberOfRegisters(bool Vector);
   unsigned getRegisterBitWidth(bool Vector);
 
+  bool supportsEfficientVectorElementLoadStore() { return true; }
   bool enableInterleavedAccessVectorization() { return true; }
 
   int getArithmeticInstrCost(

Modified: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp?rev=300056&r1=300055&r2=300056&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp Wed Apr 12 07:41:37 2017
@@ -3663,13 +3663,17 @@ static unsigned getScalarizationOverhead
 
   unsigned Cost = 0;
   Type *RetTy = ToVectorTy(I->getType(), VF);
-  if (!RetTy->isVoidTy())
+  if (!RetTy->isVoidTy() &&
+      (!isa<LoadInst>(I) ||
+       !TTI.supportsEfficientVectorElementLoadStore()))
     Cost += TTI.getScalarizationOverhead(RetTy, true, false);
 
   if (CallInst *CI = dyn_cast<CallInst>(I)) {
     SmallVector<const Value *, 4> Operands(CI->arg_operands());
     Cost += TTI.getOperandsScalarizationOverhead(Operands, VF);
-  } else {
+  }
+  else if (!isa<StoreInst>(I) ||
+           !TTI.supportsEfficientVectorElementLoadStore()) {
     SmallVector<const Value *, 4> Operands(I->operand_values());
     Cost += TTI.getOperandsScalarizationOverhead(Operands, VF);
   }

Added: llvm/trunk/test/Transforms/LoopVectorize/SystemZ/load-store-scalarization-cost.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/SystemZ/load-store-scalarization-cost.ll?rev=300056&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/SystemZ/load-store-scalarization-cost.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/SystemZ/load-store-scalarization-cost.ll Wed Apr 12 07:41:37 2017
@@ -0,0 +1,33 @@
+; REQUIRES: asserts
+; RUN: opt -mtriple=s390x-unknown-linux -mcpu=z13 -loop-vectorize \
+; RUN:   -force-vector-width=4 -debug-only=loop-vectorize \
+; RUN:   -disable-output -enable-interleaved-mem-accesses=false < %s 2>&1 | \
+; RUN:   FileCheck %s
+;
+; Check that a scalarized load/store does not get a cost for insterts/
+; extracts, since z13 supports element load/store.
+
+define void @fun(i32* %data, i64 %n) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+  %tmp0 = getelementptr inbounds i32, i32* %data, i64 %i
+  %tmp1 = load i32, i32* %tmp0, align 4
+  %tmp2 = add i32 %tmp1, 1
+  store i32 %tmp2, i32* %tmp0, align 4
+  %i.next = add nuw nsw i64 %i, 2
+  %cond = icmp slt i64 %i.next, %n
+  br i1 %cond, label %for.body, label %for.end
+
+for.end:
+  ret void
+
+; CHECK: LV: Found an estimated cost of 4 for VF 4 For instruction:   %tmp1 = load i32, i32* %tmp0, align 4
+; CHECK: LV: Found an estimated cost of 4 for VF 4 For instruction:   store i32 %tmp2, i32* %tmp0, align 4
+
+; CHECK: LV: Scalarizing:  %tmp1 = load i32, i32* %tmp0, align 4
+; CHECK: LV: Scalarizing:  store i32 %tmp2, i32* %tmp0, align 4
+}
+