[llvm] r224119 - This patch recognizes (+ (+ v0, v1) (+ v2, v3)), reorders them for bundling into vector of loads,

Fri Dec 19 15:45:32 PST 2014

On Dec 12, 2014, at 4:53 AM, Suyog Sarda <suyog.sarda at samsung.com> wrote:
> 
> Author: suyog
> Date: Fri Dec 12 06:53:44 2014
> New Revision: 224119
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=224119&view=rev
> Log:
> This patch recognizes (+ (+ v0, v1) (+ v2, v3)), reorders them for bundling into vector of loads,
> and vectorizes it. 
> 
> Test case :
> 
>       float hadd(float* a) {
>           return (a[0] + a[1]) + (a[2] + a[3]);
>        }
> 
> 
> AArch64 assembly before patch :
> 
>        ldp	s0, s1, [x0]
> 	ldp	s2, s3, [x0, #8]
> 	fadd	s0, s0, s1
> 	fadd	s1, s2, s3
> 	fadd	s0, s0, s1
> 	ret
> 
> AArch64 assembly after patch :
> 
>        ldp	d0, d1, [x0]
> 	fadd	v0.2s, v0.2s, v1.2s
> 	faddp	s0, v0.2s
> 	ret
> 
> Reviewed Link : http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20141208/248531.html
> 
> 
> Added:
>    llvm/trunk/test/Transforms/SLPVectorizer/AArch64/horizontaladd.ll
> Modified:
>    llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
> 
> Modified: llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp?rev=224119&r1=224118&r2=224119&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp (original)
> +++ llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp Fri Dec 12 06:53:44 2014
> @@ -439,6 +439,13 @@ public:
>   /// \returns true if the memory operations A and B are consecutive.
>   bool isConsecutiveAccess(Value *A, Value *B);
> 
> +  /// For consecutive loads (+(+ v0, v1)(+ v2, v3)), Left had v0 and v2
> +  /// while Right had v1 and v3, which prevented bundling them into
> +  /// a vector of loads. Rorder them so that Left now has v0 and v1
> +  /// while Right has v2 and v3 enabling their bundling into a vector.
> +  void reorderIfConsecutiveLoads(SmallVectorImpl<Value *> &Left,
> +                                 SmallVectorImpl<Value *> &Right);
> +
>   /// \brief Perform LICM and CSE on the newly generated gather sequences.
>   void optimizeGatherSequence();
> 
> @@ -1234,6 +1241,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Val
>       if (isa<BinaryOperator>(VL0) && VL0->isCommutative()) {
>         ValueList Left, Right;
>         reorderInputsAccordingToOpcode(VL, Left, Right);
> +        reorderIfConsecutiveLoads (Left, Right);
>         buildTree_rec(Left, Depth + 1);
>         buildTree_rec(Right, Depth + 1);
>         return;
> @@ -1818,6 +1826,19 @@ bool BoUpSLP::isConsecutiveAccess(Value
>   return X == PtrSCEVB;
> }
> 
> +void BoUpSLP::reorderIfConsecutiveLoads(SmallVectorImpl<Value *> &Left,
> +                                        SmallVectorImpl<Value *> &Right) {
> +  for (unsigned i = 0, e = Left.size(); i < e - 1; ++i) {
> +    if (!isa<LoadInst>(Left[i]) || !isa<LoadInst>(Right[i]))
> +      return;
> +    if (!(isConsecutiveAccess(Left[i], Right[i]) &&
> +          isConsecutiveAccess(Right[i], Left[i + 1])))
Sorry for the late question, but why do we need isConsecutiveAccess(Right[i], Left[i + 1]) check here? I understand that if Left[i] and Right[i] are not consecutive, then there is no reason in swapping them - we still won’t get a consecutive loads. But I don’t understand what do we check with the Right[i] Vs Left[i+1] check.

Thanks,
Michael
> +      continue;
> +    else
> +      std::swap(Left[i + 1], Right[i]);
> +  }
> +}
> +
> void BoUpSLP::setInsertPointAfterBundle(ArrayRef<Value *> VL) {
>   Instruction *VL0 = cast<Instruction>(VL[0]);
>   BasicBlock::iterator NextInst = VL0;
> @@ -2048,9 +2069,10 @@ Value *BoUpSLP::vectorizeTree(TreeEntry
>     case Instruction::Or:
>     case Instruction::Xor: {
>       ValueList LHSVL, RHSVL;
> -      if (isa<BinaryOperator>(VL0) && VL0->isCommutative())
> +      if (isa<BinaryOperator>(VL0) && VL0->isCommutative()) {
>         reorderInputsAccordingToOpcode(E->Scalars, LHSVL, RHSVL);
> -      else
> +        reorderIfConsecutiveLoads(LHSVL, RHSVL);
> +      } else
>         for (int i = 0, e = E->Scalars.size(); i < e; ++i) {
>           LHSVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(0));
>           RHSVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(1));
> 
> Added: llvm/trunk/test/Transforms/SLPVectorizer/AArch64/horizontaladd.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/AArch64/horizontaladd.ll?rev=224119&view=auto
> ==============================================================================
> --- llvm/trunk/test/Transforms/SLPVectorizer/AArch64/horizontaladd.ll (added)
> +++ llvm/trunk/test/Transforms/SLPVectorizer/AArch64/horizontaladd.ll Fri Dec 12 06:53:44 2014
> @@ -0,0 +1,27 @@
> +; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=aarch64-unknown-linux-gnu -mcpu=cortex-a57 | FileCheck %s
> +target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
> +target triple = "aarch64--linux-gnu"
> +
> +; float hadd (float *a) {
> +;   return (a[0] + a[1]) + (a[2] + a[3]);
> +; }
> +
> +; CHECK_LABEL: @hadd
> +; CHECK: load <2 x float>*
> +; CHECK: fadd <2 x float>
> +; CHECK: extractelement <2 x float>
> + 
> +define float @hadd(float* nocapture readonly %a) {
> +entry:
> +  %0 = load float* %a, align 4
> +  %arrayidx1 = getelementptr inbounds float* %a, i64 1
> +  %1 = load float* %arrayidx1, align 4
> +  %add = fadd float %0, %1
> +  %arrayidx2 = getelementptr inbounds float* %a, i64 2
> +  %2 = load float* %arrayidx2, align 4
> +  %arrayidx3 = getelementptr inbounds float* %a, i64 3
> +  %3 = load float* %arrayidx3, align 4
> +  %add4 = fadd float %2, %3
> +  %add5 = fadd float %add, %add4
> +  ret float %add5
> +}
> 
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits