[llvm] r345315 - [AArch64] Create proper memoperand for multi-vector stores

Thu Oct 25 19:03:45 PDT 2018

I've reverted this commit, it was causing buildbot failures like this
<http://lab.llvm.org:8011/builders/sanitizer-x86_64-linux-fast/builds/25404/steps/check-llvm%20msan/logs/stdio>
.

On Thu, Oct 25, 2018 at 2:12 PM David Greene via llvm-commits <
llvm-commits at lists.llvm.org> wrote:

> Author: greened
> Date: Thu Oct 25 14:10:39 2018
> New Revision: 345315
>
> URL: http://llvm.org/viewvc/llvm-project?rev=345315&view=rev
> Log:
> [AArch64] Create proper memoperand for multi-vector stores
>
> Include all of the store's source vector operands when creating the
> MachineMemOperand. Previously, we were missing the first operand,
> making the store size seem smaller than it really is.
>
> Differential Revision: https://reviews.llvm.org/D52816
>
>
> Added:
>     llvm/trunk/test/CodeGen/AArch64/multi-vector-store-size.ll
> Modified:
>     llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
>
> Modified: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp?rev=345315&r1=345314&r2=345315&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp Thu Oct 25
> 14:10:39 2018
> @@ -7972,7 +7972,7 @@ bool AArch64TargetLowering::getTgtMemInt
>      Info.opc = ISD::INTRINSIC_VOID;
>      // Conservatively set memVT to the entire set of vectors stored.
>      unsigned NumElts = 0;
> -    for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE;
> ++ArgI) {
> +    for (unsigned ArgI = 0, ArgE = I.getNumArgOperands(); ArgI < ArgE;
> ++ArgI) {
>        Type *ArgTy = I.getArgOperand(ArgI)->getType();
>        if (!ArgTy->isVectorTy())
>          break;
>
> Added: llvm/trunk/test/CodeGen/AArch64/multi-vector-store-size.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/multi-vector-store-size.ll?rev=345315&view=auto
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/AArch64/multi-vector-store-size.ll (added)
> +++ llvm/trunk/test/CodeGen/AArch64/multi-vector-store-size.ll Thu Oct 25
> 14:10:39 2018
> @@ -0,0 +1,164 @@
> +; RUN: llc -mtriple=aarch64-linux-gnu -stop-after=isel < %s | FileCheck %s
> +
> +declare void @llvm.aarch64.neon.st2.v4f32.p0f32(<4 x float>, <4 x float>,
> float*)
> +declare void @llvm.aarch64.neon.st3.v4f32.p0f32(<4 x float>, <4 x float>,
> <4 x float>, float*)
> +declare void @llvm.aarch64.neon.st4.v4f32.p0f32(<4 x float>, <4 x float>,
> <4 x float>, <4 x float>, float*)
> +
> +declare void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float>, <4 x
> float>, float*)
> +declare void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float>, <4 x
> float>, <4 x float>, float*)
> +declare void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float>, <4 x
> float>, <4 x float>, <4 x float>, float*)
> +
> +declare void @llvm.aarch64.neon.st2lane.v4f32.p0f32(<4 x float>, <4 x
> float>, i64, float*)
> +declare void @llvm.aarch64.neon.st3lane.v4f32.p0f32(<4 x float>, <4 x
> float>, <4 x float>, i64, float*)
> +declare void @llvm.aarch64.neon.st4lane.v4f32.p0f32(<4 x float>, <4 x
> float>, <4 x float>, <4 x float>, i64, float*)
> +
> +define void @addstx(float* %res, <4 x float>* %a,  <4 x float>* %b, <4 x
> float>* %c, <4 x float>* %d) {
> +  %al = load <4 x float>, <4 x float>* %a
> +  %bl = load <4 x float>, <4 x float>* %b
> +  %cl = load <4 x float>, <4 x float>* %c
> +  %dl = load <4 x float>, <4 x float>* %d
> +
> +  %ar = fadd <4 x float> %al, %bl
> +  %br = fadd <4 x float> %bl, %cl
> +  %cr = fadd <4 x float> %cl, %dl
> +  %dr = fadd <4 x float> %dl, %al
> +
> +; The sizes below are conservative.  AArch64TargetLowering
> +; conservatively assumes the entire vector is stored.
> +  tail call void @llvm.aarch64.neon.st2.v4f32.p0f32(<4 x float> %ar, <4 x
> float> %br, float* %res)
> +; CHECK: ST2Twov4s {{.*}} :: (store 32 {{.*}})
> +  tail call void @llvm.aarch64.neon.st3.v4f32.p0f32(<4 x float> %ar, <4 x
> float> %br, <4 x float> %cr, float* %res)
> +; CHECK: ST3Threev4s {{.*}} :: (store 48 {{.*}})
> +  tail call void @llvm.aarch64.neon.st4.v4f32.p0f32(<4 x float> %ar, <4 x
> float> %br, <4 x float> %cr, <4 x float> %dr, float* %res)
> +; CHECK: ST4Fourv4s {{.*}} :: (store 64 {{.*}})
> +
> +  ret void
> +}
> +
> +define void @addst1x(float* %res, <4 x float>* %a,  <4 x float>* %b, <4 x
> float>* %c, <4 x float>* %d) {
> +  %al = load <4 x float>, <4 x float>* %a
> +  %bl = load <4 x float>, <4 x float>* %b
> +  %cl = load <4 x float>, <4 x float>* %c
> +  %dl = load <4 x float>, <4 x float>* %d
> +
> +  %ar = fadd <4 x float> %al, %bl
> +  %br = fadd <4 x float> %bl, %cl
> +  %cr = fadd <4 x float> %cl, %dl
> +  %dr = fadd <4 x float> %dl, %al
> +
> +; The sizes below are conservative.  AArch64TargetLowering
> +; conservatively assumes the entire vector is stored.
> +  tail call void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float> %ar, <4
> x float> %br, float* %res)
> +; CHECK: ST1Twov4s {{.*}} :: (store 32 {{.*}})
> +  tail call void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float> %ar, <4
> x float> %br, <4 x float> %cr, float* %res)
> +; CHECK: ST1Threev4s {{.*}} :: (store 48 {{.*}})
> +  tail call void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float> %ar, <4
> x float> %br, <4 x float> %cr, <4 x float> %dr, float* %res)
> +; CHECK: ST1Fourv4s {{.*}} :: (store 64 {{.*}})
> +
> +  ret void
> +}
> +
> +define void @addstxlane(float* %res, <4 x float>* %a,  <4 x float>* %b,
> <4 x float>* %c, <4 x float>* %d) {
> +  %al = load <4 x float>, <4 x float>* %a
> +  %bl = load <4 x float>, <4 x float>* %b
> +  %cl = load <4 x float>, <4 x float>* %c
> +  %dl = load <4 x float>, <4 x float>* %d
> +
> +  %ar = fadd <4 x float> %al, %bl
> +  %br = fadd <4 x float> %bl, %cl
> +  %cr = fadd <4 x float> %cl, %dl
> +  %dr = fadd <4 x float> %dl, %al
> +
> +; The sizes below are conservative.  AArch64TargetLowering
> +; conservatively assumes the entire vector is stored.
> +  tail call void @llvm.aarch64.neon.st2lane.v4f32.p0f32(<4 x float> %ar,
> <4 x float> %br, i64 1, float* %res)
> +; CHECK: ST2i32 {{.*}} :: (store 32 {{.*}})
> +  tail call void @llvm.aarch64.neon.st3lane.v4f32.p0f32(<4 x float> %ar,
> <4 x float> %br, <4 x float> %cr, i64 1, float* %res)
> +; CHECK: ST3i32 {{.*}} :: (store 48 {{.*}})
> +  tail call void @llvm.aarch64.neon.st4lane.v4f32.p0f32(<4 x float> %ar,
> <4 x float> %br, <4 x float> %cr, <4 x float> %dr, i64 1, float* %res)
> +; CHECK: ST4i32 {{.*}} :: (store 64 {{.*}})
> +
> +  ret void
> +}
> +; RUN: llc -mtriple=aarch64-linux-gnu -stop-after=isel < %s | FileCheck %s
> +
> +declare void @llvm.aarch64.neon.st2.v4f32.p0f32(<4 x float>, <4 x float>,
> float*)
> +declare void @llvm.aarch64.neon.st3.v4f32.p0f32(<4 x float>, <4 x float>,
> <4 x float>, float*)
> +declare void @llvm.aarch64.neon.st4.v4f32.p0f32(<4 x float>, <4 x float>,
> <4 x float>, <4 x float>, float*)
> +
> +declare void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float>, <4 x
> float>, float*)
> +declare void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float>, <4 x
> float>, <4 x float>, float*)
> +declare void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float>, <4 x
> float>, <4 x float>, <4 x float>, float*)
> +
> +declare void @llvm.aarch64.neon.st2lane.v4f32.p0f32(<4 x float>, <4 x
> float>, i64, float*)
> +declare void @llvm.aarch64.neon.st3lane.v4f32.p0f32(<4 x float>, <4 x
> float>, <4 x float>, i64, float*)
> +declare void @llvm.aarch64.neon.st4lane.v4f32.p0f32(<4 x float>, <4 x
> float>, <4 x float>, <4 x float>, i64, float*)
> +
> +define void @addstx(float* %res, <4 x float>* %a,  <4 x float>* %b, <4 x
> float>* %c, <4 x float>* %d) {
> +  %al = load <4 x float>, <4 x float>* %a
> +  %bl = load <4 x float>, <4 x float>* %b
> +  %cl = load <4 x float>, <4 x float>* %c
> +  %dl = load <4 x float>, <4 x float>* %d
> +
> +  %ar = fadd <4 x float> %al, %bl
> +  %br = fadd <4 x float> %bl, %cl
> +  %cr = fadd <4 x float> %cl, %dl
> +  %dr = fadd <4 x float> %dl, %al
> +
> +; The sizes below are conservative.  AArch64TargetLowering
> +; conservatively assumes the entiew vector is stored.
> +  tail call void @llvm.aarch64.neon.st2.v4f32.p0f32(<4 x float> %ar, <4 x
> float> %br, float* %res)
> +; CHECK: ST2Twov4s {{.*}} :: (store 32 {{.*}})
> +  tail call void @llvm.aarch64.neon.st3.v4f32.p0f32(<4 x float> %ar, <4 x
> float> %br, <4 x float> %cr, float* %res)
> +; CHECK: ST3Threev4s {{.*}} :: (store 48 {{.*}})
> +  tail call void @llvm.aarch64.neon.st4.v4f32.p0f32(<4 x float> %ar, <4 x
> float> %br, <4 x float> %cr, <4 x float> %dr, float* %res)
> +; CHECK: ST4Fourv4s {{.*}} :: (store 64 {{.*}})
> +
> +  ret void
> +}
> +
> +define void @addst1x(float* %res, <4 x float>* %a,  <4 x float>* %b, <4 x
> float>* %c, <4 x float>* %d) {
> +  %al = load <4 x float>, <4 x float>* %a
> +  %bl = load <4 x float>, <4 x float>* %b
> +  %cl = load <4 x float>, <4 x float>* %c
> +  %dl = load <4 x float>, <4 x float>* %d
> +
> +  %ar = fadd <4 x float> %al, %bl
> +  %br = fadd <4 x float> %bl, %cl
> +  %cr = fadd <4 x float> %cl, %dl
> +  %dr = fadd <4 x float> %dl, %al
> +
> +; The sizes below are conservative.  AArch64TargetLowering
> +; conservatively assumes the entiew vector is stored.
> +  tail call void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float> %ar, <4
> x float> %br, float* %res)
> +; CHECK: ST1Twov4s {{.*}} :: (store 32 {{.*}})
> +  tail call void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float> %ar, <4
> x float> %br, <4 x float> %cr, float* %res)
> +; CHECK: ST1Threev4s {{.*}} :: (store 48 {{.*}})
> +  tail call void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float> %ar, <4
> x float> %br, <4 x float> %cr, <4 x float> %dr, float* %res)
> +; CHECK: ST1Fourv4s {{.*}} :: (store 64 {{.*}})
> +
> +  ret void
> +}
> +
> +define void @addstxlane(float* %res, <4 x float>* %a,  <4 x float>* %b,
> <4 x float>* %c, <4 x float>* %d) {
> +  %al = load <4 x float>, <4 x float>* %a
> +  %bl = load <4 x float>, <4 x float>* %b
> +  %cl = load <4 x float>, <4 x float>* %c
> +  %dl = load <4 x float>, <4 x float>* %d
> +
> +  %ar = fadd <4 x float> %al, %bl
> +  %br = fadd <4 x float> %bl, %cl
> +  %cr = fadd <4 x float> %cl, %dl
> +  %dr = fadd <4 x float> %dl, %al
> +
> +; The sizes below are conservative.  AArch64TargetLowering
> +; conservatively assumes the entiew vector is stored.
> +  tail call void @llvm.aarch64.neon.st2lane.v4f32.p0f32(<4 x float> %ar,
> <4 x float> %br, i64 1, float* %res)
> +; CHECK: ST2i32 {{.*}} :: (store 32 {{.*}})
> +  tail call void @llvm.aarch64.neon.st3lane.v4f32.p0f32(<4 x float> %ar,
> <4 x float> %br, <4 x float> %cr, i64 1, float* %res)
> +; CHECK: ST3i32 {{.*}} :: (store 48 {{.*}})
> +  tail call void @llvm.aarch64.neon.st4lane.v4f32.p0f32(<4 x float> %ar,
> <4 x float> %br, <4 x float> %cr, <4 x float> %dr, i64 1, float* %res)
> +; CHECK: ST4i32 {{.*}} :: (store 64 {{.*}})
> +
> +  ret void
> +}
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20181025/cfd005fe/attachment.html>