[llvm] r210310 - Fix PR19657 (scalar loads not combined into vector load)
Karthik Bhat
kv.bhat at samsung.com
Thu Jun 5 23:20:08 PDT 2014
Author: karthik
Date: Fri Jun 6 01:20:08 2014
New Revision: 210310
URL: http://llvm.org/viewvc/llvm-project?rev=210310&view=rev
Log:
Fix PR19657 (scalar loads not combined into vector load)
If we have common uses on separate paths in the tree; process the one with greater common depth first.
This makes sure that we do not assume we need to extract a load when it is actually going to be part of a vectorized tree.
Review: http://reviews.llvm.org/D3800
Added:
llvm/trunk/test/Transforms/SLPVectorizer/X86/pr19657.ll
Modified:
llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
Modified: llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp?rev=210310&r1=210309&r2=210310&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp Fri Jun 6 01:20:08 2014
@@ -914,8 +914,20 @@ void BoUpSLP::buildTree_rec(ArrayRef<Val
if (isa<BinaryOperator>(VL0) && VL0->isCommutative()) {
ValueList Left, Right;
reorderInputsAccordingToOpcode(VL, Left, Right);
- buildTree_rec(Left, Depth + 1);
- buildTree_rec(Right, Depth + 1);
+ BasicBlock *LeftBB = getSameBlock(Left);
+ BasicBlock *RightBB = getSameBlock(Right);
+ // If we have common uses on separate paths in the tree make sure we
+ // process the one with greater common depth first.
+ // We can use block numbering to determine the subtree traversal as
+ // earler user has to come in between the common use and the later user.
+ if (LeftBB && RightBB && LeftBB == RightBB &&
+ getLastIndex(Right) > getLastIndex(Left)) {
+ buildTree_rec(Right, Depth + 1);
+ buildTree_rec(Left, Depth + 1);
+ } else {
+ buildTree_rec(Left, Depth + 1);
+ buildTree_rec(Right, Depth + 1);
+ }
return;
}
Added: llvm/trunk/test/Transforms/SLPVectorizer/X86/pr19657.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/pr19657.ll?rev=210310&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SLPVectorizer/X86/pr19657.ll (added)
+++ llvm/trunk/test/Transforms/SLPVectorizer/X86/pr19657.ll Fri Jun 6 01:20:08 2014
@@ -0,0 +1,73 @@
+; RUN: opt < %s -O1 -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+;CHECK: load <2 x double>*
+;CHECK: fadd <2 x double>
+;CHECK: store <2 x double>
+
+; Function Attrs: nounwind uwtable
+define void @foo(double* %x) #0 {
+ %1 = alloca double*, align 8
+ store double* %x, double** %1, align 8
+ %2 = load double** %1, align 8
+ %3 = getelementptr inbounds double* %2, i64 0
+ %4 = load double* %3, align 8
+ %5 = load double** %1, align 8
+ %6 = getelementptr inbounds double* %5, i64 0
+ %7 = load double* %6, align 8
+ %8 = fadd double %4, %7
+ %9 = load double** %1, align 8
+ %10 = getelementptr inbounds double* %9, i64 0
+ %11 = load double* %10, align 8
+ %12 = fadd double %8, %11
+ %13 = load double** %1, align 8
+ %14 = getelementptr inbounds double* %13, i64 0
+ store double %12, double* %14, align 8
+ %15 = load double** %1, align 8
+ %16 = getelementptr inbounds double* %15, i64 1
+ %17 = load double* %16, align 8
+ %18 = load double** %1, align 8
+ %19 = getelementptr inbounds double* %18, i64 1
+ %20 = load double* %19, align 8
+ %21 = fadd double %17, %20
+ %22 = load double** %1, align 8
+ %23 = getelementptr inbounds double* %22, i64 1
+ %24 = load double* %23, align 8
+ %25 = fadd double %21, %24
+ %26 = load double** %1, align 8
+ %27 = getelementptr inbounds double* %26, i64 1
+ store double %25, double* %27, align 8
+ %28 = load double** %1, align 8
+ %29 = getelementptr inbounds double* %28, i64 2
+ %30 = load double* %29, align 8
+ %31 = load double** %1, align 8
+ %32 = getelementptr inbounds double* %31, i64 2
+ %33 = load double* %32, align 8
+ %34 = fadd double %30, %33
+ %35 = load double** %1, align 8
+ %36 = getelementptr inbounds double* %35, i64 2
+ %37 = load double* %36, align 8
+ %38 = fadd double %34, %37
+ %39 = load double** %1, align 8
+ %40 = getelementptr inbounds double* %39, i64 2
+ store double %38, double* %40, align 8
+ %41 = load double** %1, align 8
+ %42 = getelementptr inbounds double* %41, i64 3
+ %43 = load double* %42, align 8
+ %44 = load double** %1, align 8
+ %45 = getelementptr inbounds double* %44, i64 3
+ %46 = load double* %45, align 8
+ %47 = fadd double %43, %46
+ %48 = load double** %1, align 8
+ %49 = getelementptr inbounds double* %48, i64 3
+ %50 = load double* %49, align 8
+ %51 = fadd double %47, %50
+ %52 = load double** %1, align 8
+ %53 = getelementptr inbounds double* %52, i64 3
+ store double %51, double* %53, align 8
+ ret void
+}
+
+attributes #0 = { nounwind }
More information about the llvm-commits
mailing list