[PATCH] D70068: [SLP] Enhance SLPVectorizer to vectorize vector aggregate

Fri Nov 15 11:17:15 PST 2019

vporpo added a comment.

Well, ideally we should be able to handle any combination and nesting of scalars, vectors and aggregates in a unified way.
For example:

  {float, <2 x float>, float},
  {float, float, <2 x float>},
  {{float, float}, <4 x float>, <2 x float>},
  {{float}, {float}, [2 x float]}, 
  {{{float, float}, float}, float}

etc.
This would require some redesign of the code, which is why I think it makes sense to have a single patch for all of them.

But yes, feel free to split them into separate patches.

But could you also add some more tests to show which of these cases this patch is taking care of. For example, I think {<2 x float>, <2 x float>, <4 x float>} or  {<2 x float>, float, float} will not work. Here are some examples:

  ; RUN: opt -slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s  

  define { <2 x float>, float, float } @StructOfVectorAndScalars(float *%Ptr) {
    %GEP0 = getelementptr inbounds float, float* %Ptr, i64 0
    %L0 = load float, float * %GEP0
    %GEP1 = getelementptr inbounds float, float* %Ptr, i64 1
    %L1 = load float, float * %GEP1
    %GEP2 = getelementptr inbounds float, float* %Ptr, i64 2
    %L2 = load float, float * %GEP2
    %GEP3 = getelementptr inbounds float, float* %Ptr, i64 3
    %L3 = load float, float * %GEP3

    %VecIn0 = insertelement <2 x float> undef, float %L0, i64 0
    %VecIn1 = insertelement <2 x float> %VecIn0, float %L1, i64 1

    %Ret0 = insertvalue {<2 x float>, float, float} undef, <2 x float> %VecIn1, 0
    %Ret1 = insertvalue {<2 x float>, float, float} %Ret0, float %L2, 1
    %Ret2 = insertvalue {<2 x float>, float, float} %Ret1, float %L3, 1
    ret {<2 x float>, float, float} %Ret1
  }

  define { <2 x float>, <2 x float>, <4 x float> } @StructOfVectors(float *%Ptr) {
    %GEP0 = getelementptr inbounds float, float* %Ptr, i64 0
    %L0 = load float, float * %GEP0
    %GEP1 = getelementptr inbounds float, float* %Ptr, i64 1
    %L1 = load float, float * %GEP1
    %GEP2 = getelementptr inbounds float, float* %Ptr, i64 2
    %L2 = load float, float * %GEP2
    %GEP3 = getelementptr inbounds float, float* %Ptr, i64 3
    %L3 = load float, float * %GEP3

    %GEP4 = getelementptr inbounds float, float* %Ptr, i64 4
    %L4 = load float, float * %GEP4
    %GEP5 = getelementptr inbounds float, float* %Ptr, i64 5
    %L5 = load float, float * %GEP5
    %GEP6 = getelementptr inbounds float, float* %Ptr, i64 6
    %L6 = load float, float * %GEP6
    %GEP7 = getelementptr inbounds float, float* %Ptr, i64 7
    %L7 = load float, float * %GEP7

    %VecIn0 = insertelement <2 x float> undef, float %L0, i64 0
    %VecIn1 = insertelement <2 x float> %VecIn0, float %L1, i64 1

    %VecIn2 = insertelement <2 x float> undef, float %L2, i64 0
    %VecIn3 = insertelement <2 x float> %VecIn2, float %L3, i64 1

    %VecIn4 = insertelement <4 x float> undef, float %L4, i64 0
    %VecIn5 = insertelement <4 x float> %VecIn4, float %L5, i64 1
    %VecIn6 = insertelement <4 x float> %VecIn5, float %L6, i64 2
    %VecIn7 = insertelement <4 x float> %VecIn6, float %L7, i64 3

    %Ret0 = insertvalue {<2 x float>, <2 x float>, <4 x float>} undef, <2 x float> %VecIn1, 0
    %Ret1 = insertvalue {<2 x float>, <2 x float>, <4 x float>} %Ret0, <2 x float> %VecIn3, 1
    %Ret2 = insertvalue {<2 x float>, <2 x float>, <4 x float>} %Ret1, <4 x float> %VecIn7, 2
    ret {<2 x float>, <2 x float>, <4 x float>} %Ret2
  }

================
Comment at: llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll:1
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
----------------
Could you replace this test with the smallest test that exposes the issue?
I think something like this should do the job:

```
; RUN: opt -slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s

; Checks that vector insertvalues into the struct become SLP seeds.
define { <2 x float>, <2 x float> } @StructOfVectors(float *%Ptr) {
  %GEP0 = getelementptr inbounds float, float* %Ptr, i64 0
  %L0 = load float, float * %GEP0
  %GEP1 = getelementptr inbounds float, float* %Ptr, i64 1
  %L1 = load float, float * %GEP1
  %GEP2 = getelementptr inbounds float, float* %Ptr, i64 2
  %L2 = load float, float * %GEP2
  %GEP3 = getelementptr inbounds float, float* %Ptr, i64 3
  %L3 = load float, float * %GEP3

  %VecIn0 = insertelement <2 x float> undef, float %L0, i64 0
  %VecIn1 = insertelement <2 x float> %VecIn0, float %L1, i64 1

  %VecIn2 = insertelement <2 x float> undef, float %L2, i64 0
  %VecIn3 = insertelement <2 x float> %VecIn2, float %L3, i64 1

  %Ret0 = insertvalue {<2 x float>, <2 x float>} undef, <2 x float> %VecIn1, 0
  %Ret1 = insertvalue {<2 x float>, <2 x float>} %Ret0, <2 x float> %VecIn3, 1
  ret {<2 x float>, <2 x float>} %Ret1
}

```

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D70068/new/

https://reviews.llvm.org/D70068