[PATCH][SLP-Vectorizer]Always set alignment of vectorized LD/ST in SLP-Vectorizer

Mon May 5 09:07:40 PDT 2014

LGTM. Please commit. 

It would also be a good idea to scan all other calls to setAlignment in the vectorizers and find similar bugs. 

Thanks,
Nadav

On May 5, 2014, at 8:47 AM, Yi Jiang <yjiang at apple.com> wrote:

> Hi,
> 
> This patch is to fix the run time error exposed by enabling SLP in LTO.  
> 
> This bug is about the alignment of vectorized memory operation. Now the SLP vectorizer will pass the alignment info from the scalar LD/ST to vectorized version. It is usually okay but sometimes the scalar LD/ST has no alignment at all. In this case, the vectorized LD/ST also has no alignment and the instcombine will set up the alignment according to the type. This will result in wrong alignment info for vectorized LD/ST and the Codegen may generate wrong instruction. In our fix, we set the alignment according to Scalar LD/ST's type if no alignment info. 
> 
> 
> Index: lib/Transforms/Vectorize/SLPVectorizer.cpp
> ===================================================================
> --- lib/Transforms/Vectorize/SLPVectorizer.cpp	(revision 207849)
> +++ lib/Transforms/Vectorize/SLPVectorizer.cpp	(working copy)
> @@ -1618,6 +1618,8 @@
>                                            VecTy->getPointerTo(AS));
>      unsigned Alignment = LI->getAlignment();
>      LI = Builder.CreateLoad(VecPtr);
> +      if (!Alignment)
> +        Alignment = DL->getABITypeAlignment(LI->getPointerOperand()->getType());
>      LI->setAlignment(Alignment);
>      E->VectorizedValue = LI;
>      return propagateMetadata(LI, E->Scalars);
> @@ -1637,6 +1639,8 @@
>      Value *VecPtr = Builder.CreateBitCast(SI->getPointerOperand(),
>                                            VecTy->getPointerTo(AS));
>      StoreInst *S = Builder.CreateStore(VecValue, VecPtr);
> +      if (!Alignment)
> +        Alignment = DL->getABITypeAlignment(SI->getPointerOperand()->getType());
>      S->setAlignment(Alignment);
>      E->VectorizedValue = S;
>      return propagateMetadata(S, E->Scalars);
> Index: test/Transforms/SLPVectorizer/X86/align.ll
> ===================================================================
> --- test/Transforms/SLPVectorizer/X86/align.ll	(revision 0)
> +++ test/Transforms/SLPVectorizer/X86/align.ll	(working copy)
> @@ -0,0 +1,27 @@
> +; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
> +
> +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
> +target triple = "x86_64-apple-macosx10.8.0"
> +
> +; Simple 3-pair chain with loads and stores
> +; CHECK: test1
> +define void @test1(double* %a, double* %b, double* %c) {
> +entry:
> +  %agg.tmp.i.i.sroa.0 = alloca [3 x double], align 16
> +; CHECK: %[[V0:[0-9]+]] = load <2 x double>* %[[V2:[0-9]+]], align 8
> +  %i0 = load double* %a 
> +  %i1 = load double* %b 
> +  %mul = fmul double %i0, %i1
> +  %store1 = getelementptr inbounds [3 x double]* %agg.tmp.i.i.sroa.0, i64 0, i64 1
> +  %store2 = getelementptr inbounds [3 x double]* %agg.tmp.i.i.sroa.0, i64 0, i64 2
> +  %arrayidx3 = getelementptr inbounds double* %a, i64 1
> +  %i3 = load double* %arrayidx3, align 8
> +  %arrayidx4 = getelementptr inbounds double* %b, i64 1
> +  %i4 = load double* %arrayidx4, align 8
> +  %mul5 = fmul double %i3, %i4
> +; CHECK: store <2 x double> %[[V1:[0-9]+]], <2 x double>* %[[V2:[0-9]+]], align 8
> +  store double %mul, double* %store1
> +  store double %mul5, double* %store2, align 16
> +; CHECK: ret
> +  ret void
> +}
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits