[llvm] r224119 - This patch recognizes (+ (+ v0, v1) (+ v2, v3)), reorders them for bundling into vector of loads,
Suyog Sarda
suyog.sarda at samsung.com
Fri Dec 12 04:53:44 PST 2014
Author: suyog
Date: Fri Dec 12 06:53:44 2014
New Revision: 224119
URL: http://llvm.org/viewvc/llvm-project?rev=224119&view=rev
Log:
This patch recognizes (+ (+ v0, v1) (+ v2, v3)), reorders them for bundling into vector of loads,
and vectorizes it.
Test case :
float hadd(float* a) {
return (a[0] + a[1]) + (a[2] + a[3]);
}
AArch64 assembly before patch :
ldp s0, s1, [x0]
ldp s2, s3, [x0, #8]
fadd s0, s0, s1
fadd s1, s2, s3
fadd s0, s0, s1
ret
AArch64 assembly after patch :
ldp d0, d1, [x0]
fadd v0.2s, v0.2s, v1.2s
faddp s0, v0.2s
ret
Reviewed Link : http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20141208/248531.html
Added:
llvm/trunk/test/Transforms/SLPVectorizer/AArch64/horizontaladd.ll
Modified:
llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
Modified: llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp?rev=224119&r1=224118&r2=224119&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp Fri Dec 12 06:53:44 2014
@@ -439,6 +439,13 @@ public:
/// \returns true if the memory operations A and B are consecutive.
bool isConsecutiveAccess(Value *A, Value *B);
+ /// For consecutive loads (+(+ v0, v1)(+ v2, v3)), Left had v0 and v2
+ /// while Right had v1 and v3, which prevented bundling them into
+ /// a vector of loads. Rorder them so that Left now has v0 and v1
+ /// while Right has v2 and v3 enabling their bundling into a vector.
+ void reorderIfConsecutiveLoads(SmallVectorImpl<Value *> &Left,
+ SmallVectorImpl<Value *> &Right);
+
/// \brief Perform LICM and CSE on the newly generated gather sequences.
void optimizeGatherSequence();
@@ -1234,6 +1241,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Val
if (isa<BinaryOperator>(VL0) && VL0->isCommutative()) {
ValueList Left, Right;
reorderInputsAccordingToOpcode(VL, Left, Right);
+ reorderIfConsecutiveLoads (Left, Right);
buildTree_rec(Left, Depth + 1);
buildTree_rec(Right, Depth + 1);
return;
@@ -1818,6 +1826,19 @@ bool BoUpSLP::isConsecutiveAccess(Value
return X == PtrSCEVB;
}
+void BoUpSLP::reorderIfConsecutiveLoads(SmallVectorImpl<Value *> &Left,
+ SmallVectorImpl<Value *> &Right) {
+ for (unsigned i = 0, e = Left.size(); i < e - 1; ++i) {
+ if (!isa<LoadInst>(Left[i]) || !isa<LoadInst>(Right[i]))
+ return;
+ if (!(isConsecutiveAccess(Left[i], Right[i]) &&
+ isConsecutiveAccess(Right[i], Left[i + 1])))
+ continue;
+ else
+ std::swap(Left[i + 1], Right[i]);
+ }
+}
+
void BoUpSLP::setInsertPointAfterBundle(ArrayRef<Value *> VL) {
Instruction *VL0 = cast<Instruction>(VL[0]);
BasicBlock::iterator NextInst = VL0;
@@ -2048,9 +2069,10 @@ Value *BoUpSLP::vectorizeTree(TreeEntry
case Instruction::Or:
case Instruction::Xor: {
ValueList LHSVL, RHSVL;
- if (isa<BinaryOperator>(VL0) && VL0->isCommutative())
+ if (isa<BinaryOperator>(VL0) && VL0->isCommutative()) {
reorderInputsAccordingToOpcode(E->Scalars, LHSVL, RHSVL);
- else
+ reorderIfConsecutiveLoads(LHSVL, RHSVL);
+ } else
for (int i = 0, e = E->Scalars.size(); i < e; ++i) {
LHSVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(0));
RHSVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(1));
Added: llvm/trunk/test/Transforms/SLPVectorizer/AArch64/horizontaladd.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/AArch64/horizontaladd.ll?rev=224119&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SLPVectorizer/AArch64/horizontaladd.ll (added)
+++ llvm/trunk/test/Transforms/SLPVectorizer/AArch64/horizontaladd.ll Fri Dec 12 06:53:44 2014
@@ -0,0 +1,27 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=aarch64-unknown-linux-gnu -mcpu=cortex-a57 | FileCheck %s
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+; float hadd (float *a) {
+; return (a[0] + a[1]) + (a[2] + a[3]);
+; }
+
+; CHECK_LABEL: @hadd
+; CHECK: load <2 x float>*
+; CHECK: fadd <2 x float>
+; CHECK: extractelement <2 x float>
+
+define float @hadd(float* nocapture readonly %a) {
+entry:
+ %0 = load float* %a, align 4
+ %arrayidx1 = getelementptr inbounds float* %a, i64 1
+ %1 = load float* %arrayidx1, align 4
+ %add = fadd float %0, %1
+ %arrayidx2 = getelementptr inbounds float* %a, i64 2
+ %2 = load float* %arrayidx2, align 4
+ %arrayidx3 = getelementptr inbounds float* %a, i64 3
+ %3 = load float* %arrayidx3, align 4
+ %add4 = fadd float %2, %3
+ %add5 = fadd float %add, %add4
+ ret float %add5
+}
More information about the llvm-commits
mailing list