[llvm-dev] SLP vectorizer on simple function not working

Frank Winter via llvm-dev llvm-dev at lists.llvm.org
Wed Aug 19 11:43:37 PDT 2015


The attached function should be vectorizable with a SIMD width of 4. 
However, the SLP vectorizer tells me it cannot schedule the bundle.

llvm-3.6/bin/opt -datalayout -basicaa -slp-vectorizer -debug-only SLP -S 
< func_simple_scalar.ll -o tmp.ll &> out

SLP: Analyzing blocks in main.
SLP: Found 4 stores to vectorize.
SLP: Analyzing a store chain of length 4.
SLP: Analyzing a store chain of length 4
SLP: Analyzing 4 stores at offset 0
SLP:  bundle:   store float %4, float* %5
SLP:  initialize schedule region to   store float %4, float* %5
SLP:  extend schedule region end to   store float %10, float* %11
SLP:  extend schedule region end to   store float %16, float* %17
SLP:  extend schedule region end to   store float %22, float* %23
SLP: try schedule bundle [  store float %4, float* %5;  store float %10, 
float* %11;  store float %16, float* %17;  store float %22, float* %23] 
in block entrypoint
SLP:       update deps of [  store float %4, float* %5;  store float 
%10, float* %11;  store float %16, float* %17;  store float %22, float* %23]
SLP:       update deps of /   store float %10, float* %11
SLP:       update deps of /   store float %16, float* %17
SLP:       update deps of /   store float %22, float* %23
SLP:       update deps of   %21 = load float* %20
SLP:       update deps of   %22 = fmul float %21, %19
SLP:       update deps of   %19 = load float* %18
SLP:       update deps of   %15 = load float* %14
SLP:       update deps of   %16 = fmul float %15, %13
SLP:       update deps of   %13 = load float* %12
SLP:       update deps of   %9 = load float* %8
SLP:       update deps of   %10 = fmul float %9, %7
SLP:       update deps of   %7 = load float* %6
SLP: We are not able to schedule this bundle!
SLP:  cancel scheduling of [  store float %4, float* %5;  store float 
%10, float* %11;  store float %16, float* %17;  store float %22, float* %23]
SLP: Calculating cost for tree of size 1.
SLP: Check whether the tree with height 1 is fully vectorizable .
SLP: Found cost=2147483647 for VF=4

What am I missing?

Thanks,
Frank
-------------- next part --------------
;; ModuleID = 'module'
target triple = "x86_64-unknown-linux-gnu"

define void @main(i64 %lo, i64 %hi, float* %arg0, float* %arg1, float* %arg2) {
entrypoint:
  %0 = getelementptr float* %arg1, i32 0
  %1 = load float* %0
  %2 = getelementptr float* %arg2, i32 0
  %3 = load float* %2
  %4 = fmul float %3, %1
  %5 = getelementptr float* %arg0, i32 0
  store float %4, float* %5
  %6 = getelementptr float* %arg1, i32 1
  %7 = load float* %6
  %8 = getelementptr float* %arg2, i32 1
  %9 = load float* %8
  %10 = fmul float %9, %7
  %11 = getelementptr float* %arg0, i32 1
  store float %10, float* %11
  %12 = getelementptr float* %arg1, i32 2
  %13 = load float* %12
  %14 = getelementptr float* %arg2, i32 2
  %15 = load float* %14
  %16 = fmul float %15, %13
  %17 = getelementptr float* %arg0, i32 2
  store float %16, float* %17
  %18 = getelementptr float* %arg1, i32 3
  %19 = load float* %18
  %20 = getelementptr float* %arg2, i32 3
  %21 = load float* %20
  %22 = fmul float %21, %19
  %23 = getelementptr float* %arg0, i32 3
  store float %22, float* %23
  ret void
}


More information about the llvm-dev mailing list