[llvm] r224119 - This patch recognizes (+ (+ v0, v1) (+ v2, v3)), reorders them for bundling into vector of loads,

Wed Dec 17 00:59:24 PST 2014

I've sent further feedback to the review thread, but please revert this
patch. It violates very basic principles of floating point arithmetic and
is trigger miscompiles.

On Fri, Dec 12, 2014 at 4:53 AM, Suyog Sarda <suyog.sarda at samsung.com>
wrote:
>
> Author: suyog
> Date: Fri Dec 12 06:53:44 2014
> New Revision: 224119
>
> URL: http://llvm.org/viewvc/llvm-project?rev=224119&view=rev
> Log:
> This patch recognizes (+ (+ v0, v1) (+ v2, v3)), reorders them for
> bundling into vector of loads,
> and vectorizes it.
>
>  Test case :
>
>        float hadd(float* a) {
>            return (a[0] + a[1]) + (a[2] + a[3]);
>         }
>
>
>  AArch64 assembly before patch :
>
>         ldp     s0, s1, [x0]
>         ldp     s2, s3, [x0, #8]
>         fadd    s0, s0, s1
>         fadd    s1, s2, s3
>         fadd    s0, s0, s1
>         ret
>
>  AArch64 assembly after patch :
>
>         ldp     d0, d1, [x0]
>         fadd    v0.2s, v0.2s, v1.2s
>         faddp   s0, v0.2s
>         ret
>
> Reviewed Link :
> http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20141208/248531.html
>
>
> Added:
>     llvm/trunk/test/Transforms/SLPVectorizer/AArch64/horizontaladd.ll
> Modified:
>     llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
>
> Modified: llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp?rev=224119&r1=224118&r2=224119&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp (original)
> +++ llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp Fri Dec 12
> 06:53:44 2014
> @@ -439,6 +439,13 @@ public:
>    /// \returns true if the memory operations A and B are consecutive.
>    bool isConsecutiveAccess(Value *A, Value *B);
>
> +  /// For consecutive loads (+(+ v0, v1)(+ v2, v3)), Left had v0 and v2
> +  /// while Right had v1 and v3, which prevented bundling them into
> +  /// a vector of loads. Rorder them so that Left now has v0 and v1
> +  /// while Right has v2 and v3 enabling their bundling into a vector.
> +  void reorderIfConsecutiveLoads(SmallVectorImpl<Value *> &Left,
> +                                 SmallVectorImpl<Value *> &Right);
> +
>    /// \brief Perform LICM and CSE on the newly generated gather sequences.
>    void optimizeGatherSequence();
>
> @@ -1234,6 +1241,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Val
>        if (isa<BinaryOperator>(VL0) && VL0->isCommutative()) {
>          ValueList Left, Right;
>          reorderInputsAccordingToOpcode(VL, Left, Right);
> +        reorderIfConsecutiveLoads (Left, Right);
>          buildTree_rec(Left, Depth + 1);
>          buildTree_rec(Right, Depth + 1);
>          return;
> @@ -1818,6 +1826,19 @@ bool BoUpSLP::isConsecutiveAccess(Value
>    return X == PtrSCEVB;
>  }
>
> +void BoUpSLP::reorderIfConsecutiveLoads(SmallVectorImpl<Value *> &Left,
> +                                        SmallVectorImpl<Value *> &Right) {
> +  for (unsigned i = 0, e = Left.size(); i < e - 1; ++i) {
> +    if (!isa<LoadInst>(Left[i]) || !isa<LoadInst>(Right[i]))
> +      return;
> +    if (!(isConsecutiveAccess(Left[i], Right[i]) &&
> +          isConsecutiveAccess(Right[i], Left[i + 1])))
> +      continue;
> +    else
> +      std::swap(Left[i + 1], Right[i]);
> +  }
> +}
> +
>  void BoUpSLP::setInsertPointAfterBundle(ArrayRef<Value *> VL) {
>    Instruction *VL0 = cast<Instruction>(VL[0]);
>    BasicBlock::iterator NextInst = VL0;
> @@ -2048,9 +2069,10 @@ Value *BoUpSLP::vectorizeTree(TreeEntry
>      case Instruction::Or:
>      case Instruction::Xor: {
>        ValueList LHSVL, RHSVL;
> -      if (isa<BinaryOperator>(VL0) && VL0->isCommutative())
> +      if (isa<BinaryOperator>(VL0) && VL0->isCommutative()) {
>          reorderInputsAccordingToOpcode(E->Scalars, LHSVL, RHSVL);
> -      else
> +        reorderIfConsecutiveLoads(LHSVL, RHSVL);
> +      } else
>          for (int i = 0, e = E->Scalars.size(); i < e; ++i) {
>
>  LHSVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(0));
>
>  RHSVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(1));
>
> Added: llvm/trunk/test/Transforms/SLPVectorizer/AArch64/horizontaladd.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/AArch64/horizontaladd.ll?rev=224119&view=auto
>
> ==============================================================================
> --- llvm/trunk/test/Transforms/SLPVectorizer/AArch64/horizontaladd.ll
> (added)
> +++ llvm/trunk/test/Transforms/SLPVectorizer/AArch64/horizontaladd.ll Fri
> Dec 12 06:53:44 2014
> @@ -0,0 +1,27 @@
> +; RUN: opt < %s -basicaa -slp-vectorizer -S
> -mtriple=aarch64-unknown-linux-gnu -mcpu=cortex-a57 | FileCheck %s
> +target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
> +target triple = "aarch64--linux-gnu"
> +
> +; float hadd (float *a) {
> +;   return (a[0] + a[1]) + (a[2] + a[3]);
> +; }
> +
> +; CHECK_LABEL: @hadd
> +; CHECK: load <2 x float>*
> +; CHECK: fadd <2 x float>
> +; CHECK: extractelement <2 x float>
> +
> +define float @hadd(float* nocapture readonly %a) {
> +entry:
> +  %0 = load float* %a, align 4
> +  %arrayidx1 = getelementptr inbounds float* %a, i64 1
> +  %1 = load float* %arrayidx1, align 4
> +  %add = fadd float %0, %1
> +  %arrayidx2 = getelementptr inbounds float* %a, i64 2
> +  %2 = load float* %arrayidx2, align 4
> +  %arrayidx3 = getelementptr inbounds float* %a, i64 3
> +  %3 = load float* %arrayidx3, align 4
> +  %add4 = fadd float %2, %3
> +  %add5 = fadd float %add, %add4
> +  ret float %add5
> +}
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20141217/63db2ed1/attachment.html>