[llvm] r224119 - This patch recognizes (+ (+ v0, v1) (+ v2, v3)), reorders them for bundling into vector of loads,
Sean Silva
chisophugis at gmail.com
Mon Dec 22 14:14:29 PST 2014
Did this ever get reverted?
On Wed, Dec 17, 2014 at 12:59 AM, Chandler Carruth <chandlerc at google.com>
wrote:
> I've sent further feedback to the review thread, but please revert this
> patch. It violates very basic principles of floating point arithmetic and
> is trigger miscompiles.
>
> On Fri, Dec 12, 2014 at 4:53 AM, Suyog Sarda <suyog.sarda at samsung.com>
> wrote:
>>
>> Author: suyog
>> Date: Fri Dec 12 06:53:44 2014
>> New Revision: 224119
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=224119&view=rev
>> Log:
>> This patch recognizes (+ (+ v0, v1) (+ v2, v3)), reorders them for
>> bundling into vector of loads,
>> and vectorizes it.
>>
>> Test case :
>>
>> float hadd(float* a) {
>> return (a[0] + a[1]) + (a[2] + a[3]);
>> }
>>
>>
>> AArch64 assembly before patch :
>>
>> ldp s0, s1, [x0]
>> ldp s2, s3, [x0, #8]
>> fadd s0, s0, s1
>> fadd s1, s2, s3
>> fadd s0, s0, s1
>> ret
>>
>> AArch64 assembly after patch :
>>
>> ldp d0, d1, [x0]
>> fadd v0.2s, v0.2s, v1.2s
>> faddp s0, v0.2s
>> ret
>>
>> Reviewed Link :
>> http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20141208/248531.html
>>
>>
>> Added:
>> llvm/trunk/test/Transforms/SLPVectorizer/AArch64/horizontaladd.ll
>> Modified:
>> llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
>>
>> Modified: llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp?rev=224119&r1=224118&r2=224119&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp (original)
>> +++ llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp Fri Dec 12
>> 06:53:44 2014
>> @@ -439,6 +439,13 @@ public:
>> /// \returns true if the memory operations A and B are consecutive.
>> bool isConsecutiveAccess(Value *A, Value *B);
>>
>> + /// For consecutive loads (+(+ v0, v1)(+ v2, v3)), Left had v0 and v2
>> + /// while Right had v1 and v3, which prevented bundling them into
>> + /// a vector of loads. Rorder them so that Left now has v0 and v1
>> + /// while Right has v2 and v3 enabling their bundling into a vector.
>> + void reorderIfConsecutiveLoads(SmallVectorImpl<Value *> &Left,
>> + SmallVectorImpl<Value *> &Right);
>> +
>> /// \brief Perform LICM and CSE on the newly generated gather
>> sequences.
>> void optimizeGatherSequence();
>>
>> @@ -1234,6 +1241,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Val
>> if (isa<BinaryOperator>(VL0) && VL0->isCommutative()) {
>> ValueList Left, Right;
>> reorderInputsAccordingToOpcode(VL, Left, Right);
>> + reorderIfConsecutiveLoads (Left, Right);
>> buildTree_rec(Left, Depth + 1);
>> buildTree_rec(Right, Depth + 1);
>> return;
>> @@ -1818,6 +1826,19 @@ bool BoUpSLP::isConsecutiveAccess(Value
>> return X == PtrSCEVB;
>> }
>>
>> +void BoUpSLP::reorderIfConsecutiveLoads(SmallVectorImpl<Value *> &Left,
>> + SmallVectorImpl<Value *> &Right)
>> {
>> + for (unsigned i = 0, e = Left.size(); i < e - 1; ++i) {
>> + if (!isa<LoadInst>(Left[i]) || !isa<LoadInst>(Right[i]))
>> + return;
>> + if (!(isConsecutiveAccess(Left[i], Right[i]) &&
>> + isConsecutiveAccess(Right[i], Left[i + 1])))
>> + continue;
>> + else
>> + std::swap(Left[i + 1], Right[i]);
>> + }
>> +}
>> +
>> void BoUpSLP::setInsertPointAfterBundle(ArrayRef<Value *> VL) {
>> Instruction *VL0 = cast<Instruction>(VL[0]);
>> BasicBlock::iterator NextInst = VL0;
>> @@ -2048,9 +2069,10 @@ Value *BoUpSLP::vectorizeTree(TreeEntry
>> case Instruction::Or:
>> case Instruction::Xor: {
>> ValueList LHSVL, RHSVL;
>> - if (isa<BinaryOperator>(VL0) && VL0->isCommutative())
>> + if (isa<BinaryOperator>(VL0) && VL0->isCommutative()) {
>> reorderInputsAccordingToOpcode(E->Scalars, LHSVL, RHSVL);
>> - else
>> + reorderIfConsecutiveLoads(LHSVL, RHSVL);
>> + } else
>> for (int i = 0, e = E->Scalars.size(); i < e; ++i) {
>>
>> LHSVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(0));
>>
>> RHSVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(1));
>>
>> Added: llvm/trunk/test/Transforms/SLPVectorizer/AArch64/horizontaladd.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/AArch64/horizontaladd.ll?rev=224119&view=auto
>>
>> ==============================================================================
>> --- llvm/trunk/test/Transforms/SLPVectorizer/AArch64/horizontaladd.ll
>> (added)
>> +++ llvm/trunk/test/Transforms/SLPVectorizer/AArch64/horizontaladd.ll Fri
>> Dec 12 06:53:44 2014
>> @@ -0,0 +1,27 @@
>> +; RUN: opt < %s -basicaa -slp-vectorizer -S
>> -mtriple=aarch64-unknown-linux-gnu -mcpu=cortex-a57 | FileCheck %s
>> +target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
>> +target triple = "aarch64--linux-gnu"
>> +
>> +; float hadd (float *a) {
>> +; return (a[0] + a[1]) + (a[2] + a[3]);
>> +; }
>> +
>> +; CHECK_LABEL: @hadd
>> +; CHECK: load <2 x float>*
>> +; CHECK: fadd <2 x float>
>> +; CHECK: extractelement <2 x float>
>> +
>> +define float @hadd(float* nocapture readonly %a) {
>> +entry:
>> + %0 = load float* %a, align 4
>> + %arrayidx1 = getelementptr inbounds float* %a, i64 1
>> + %1 = load float* %arrayidx1, align 4
>> + %add = fadd float %0, %1
>> + %arrayidx2 = getelementptr inbounds float* %a, i64 2
>> + %2 = load float* %arrayidx2, align 4
>> + %arrayidx3 = getelementptr inbounds float* %a, i64 3
>> + %3 = load float* %arrayidx3, align 4
>> + %add4 = fadd float %2, %3
>> + %add5 = fadd float %add, %add4
>> + ret float %add5
>> +}
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20141222/2c65524c/attachment.html>
More information about the llvm-commits
mailing list