[llvm] r246824 - [PowerPC] Enable interleaved-access vectorization
Hal Finkel via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 3 17:10:41 PDT 2015
Author: hfinkel
Date: Thu Sep 3 19:10:41 2015
New Revision: 246824
URL: http://llvm.org/viewvc/llvm-project?rev=246824&view=rev
Log:
[PowerPC] Enable interleaved-access vectorization
This adds a basic cost model for interleaved-access vectorization (and a better
default for shuffles), and enables interleaved-access vectorization by default.
The relevant difference from the default cost model for interleaved-access
vectorization, is that on PPC, the shuffles that end up being used are *much*
cheaper than modeling the process with insert/extract pairs (which are
quite expensive, especially on older cores).
Added:
llvm/trunk/test/Transforms/LoopVectorize/PowerPC/stride-vectorization.ll
Modified:
llvm/trunk/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
llvm/trunk/lib/Target/PowerPC/PPCTargetTransformInfo.h
Modified: llvm/trunk/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCTargetTransformInfo.cpp?rev=246824&r1=246823&r2=246824&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCTargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCTargetTransformInfo.cpp Thu Sep 3 19:10:41 2015
@@ -207,6 +207,10 @@ bool PPCTTIImpl::enableAggressiveInterle
return LoopHasReductions;
}
+bool PPCTTIImpl::enableInterleavedAccessVectorization() {
+ return true;
+}
+
unsigned PPCTTIImpl::getNumberOfRegisters(bool Vector) {
if (Vector && !ST->hasAltivec() && !ST->hasQPX())
return 0;
@@ -266,7 +270,15 @@ int PPCTTIImpl::getArithmeticInstrCost(
int PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp) {
- return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
+ // Legalize the type.
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
+
+ // PPC, for both Altivec/VSX and QPX, support cheap arbitrary permutations
+ // (at least in the sense that there need only be one non-loop-invariant
+ // instruction). We need one such shuffle instruction for each actual
+ // register (this is not true for arbitrary shuffles, but is true for the
+ // structured types of shuffles covered by TTI::ShuffleKind).
+ return LT.first;
}
int PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
@@ -374,4 +386,28 @@ int PPCTTIImpl::getMemoryOpCost(unsigned
return Cost;
}
+
+int PPCTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
+ unsigned Factor,
+ ArrayRef<unsigned> Indices,
+ unsigned Alignment,
+ unsigned AddressSpace) {
+ assert(isa<VectorType>(VecTy) &&
+ "Expect a vector type for interleaved memory op");
+
+ // Legalize the type.
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, VecTy);
+
+ // Firstly, the cost of load/store operation.
+ int Cost = getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace);
+
+ // PPC, for both Altivec/VSX and QPX, support cheap arbitrary permutations
+ // (at least in the sense that there need only be one non-loop-invariant
+ // instruction). For each result vector, we need one shuffle per incoming
+ // vector (except that the first shuffle can take two incoming vectors
+ // because it does not need to take itself).
+ Cost += Factor*(LT.first-1);
+
+ return Cost;
+}
Modified: llvm/trunk/lib/Target/PowerPC/PPCTargetTransformInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCTargetTransformInfo.h?rev=246824&r1=246823&r2=246824&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCTargetTransformInfo.h (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCTargetTransformInfo.h Thu Sep 3 19:10:41 2015
@@ -67,6 +67,7 @@ public:
/// @{
bool enableAggressiveInterleaving(bool LoopHasReductions);
+ bool enableInterleavedAccessVectorization();
unsigned getNumberOfRegisters(bool Vector);
unsigned getRegisterBitWidth(bool Vector);
unsigned getMaxInterleaveFactor(unsigned VF);
@@ -82,6 +83,11 @@ public:
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
unsigned AddressSpace);
+ int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
+ unsigned Factor,
+ ArrayRef<unsigned> Indices,
+ unsigned Alignment,
+ unsigned AddressSpace);
/// @}
};
Added: llvm/trunk/test/Transforms/LoopVectorize/PowerPC/stride-vectorization.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/PowerPC/stride-vectorization.ll?rev=246824&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/PowerPC/stride-vectorization.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/PowerPC/stride-vectorization.ll Thu Sep 3 19:10:41 2015
@@ -0,0 +1,30 @@
+; RUN: opt -S -basicaa -loop-vectorize < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define void @foo(double* noalias nocapture %a, double* noalias nocapture readonly %b) #0 {
+entry:
+ br label %for.body
+
+; CHECK-LABEL: @foo
+; CHECK: <2 x double>
+
+for.cond.cleanup: ; preds = %for.body
+ ret void
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %0 = shl nsw i64 %indvars.iv, 1
+ %arrayidx = getelementptr inbounds double, double* %b, i64 %0
+ %1 = load double, double* %arrayidx, align 8
+ %add = fadd double %1, 1.000000e+00
+ %arrayidx2 = getelementptr inbounds double, double* %a, i64 %indvars.iv
+ store double %add, double* %arrayidx2, align 8
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 1600
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+attributes #0 = { nounwind "target-cpu"="pwr8" }
+
More information about the llvm-commits
mailing list