[llvm] r271410 - [LV] For some IVs, use vector phis instead of widening in the loop body

Fri Jun 3 04:12:16 PDT 2016

Hi Michael,

We have seen significant performance regressions on some widely used industry benchmark. 

I've filed https://llvm.org/bugs/show_bug.cgi?id=27988 with a reproducer.

As described in the Bugzilla ticket, the generated code looks not great at all on x86_64 or aarch64: none of the backends are able to do a proper job with the IV.

Could you have a look into it ?

Kind regards,
Arnaud

> -----Original Message-----
> From: llvm-commits [mailto:llvm-commits-bounces at lists.llvm.org] On Behalf
> Of Michael Kuperstein via llvm-commits
> Sent: 01 June 2016 19:17
> To: llvm-commits at lists.llvm.org
> Subject: [llvm] r271410 - [LV] For some IVs, use vector phis instead of
> widening in the loop body
> 
> Author: mkuper
> Date: Wed Jun  1 12:16:46 2016
> New Revision: 271410
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=271410&view=rev
> Log:
> [LV] For some IVs, use vector phis instead of widening in the loop body
> 
> Previously, whenever we needed a vector IV, we would create it on the fly,
> by splatting the scalar IV and adding a step vector. Instead, we can create a
> real vector IV. This tends to save a couple of instructions per iteration.
> 
> This only changes the behavior for the most basic case - integer primary IVs
> with a constant step.
> 
> Differential Revision: http://reviews.llvm.org/D20315
> 
> Modified:
>     llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
>     llvm/trunk/test/Transforms/LoopVectorize/PowerPC/vsx-tsvc-s173.ll
>     llvm/trunk/test/Transforms/LoopVectorize/X86/gather_scatter.ll
>     llvm/trunk/test/Transforms/LoopVectorize/cast-induction.ll
>     llvm/trunk/test/Transforms/LoopVectorize/gcc-examples.ll
>     llvm/trunk/test/Transforms/LoopVectorize/gep_with_bitcast.ll
>     llvm/trunk/test/Transforms/LoopVectorize/global_alias.ll
>     llvm/trunk/test/Transforms/LoopVectorize/induction.ll
>     llvm/trunk/test/Transforms/LoopVectorize/induction_plus.ll
> 
> Modified: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
> URL: http://llvm.org/viewvc/llvm-
> project/llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp?rev=271410
> &r1=271409&r2=271410&view=diff
> ==========================================================
> ====================
> --- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp (original)
> +++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp Wed Jun  1
> +++ 12:16:46 2016
> @@ -422,6 +422,14 @@ protected:
>    /// from SCEV or creates a new using SCEVExpander.
>    virtual Value *getStepVector(Value *Val, int StartIdx, const SCEV *Step);
> 
> +  /// Create a vector induction variable based on an existing scalar one.
> +  /// Currently only works for integer primary induction variables with
> + /// a constant step.
> +  /// If TruncType is provided, instead of widening the original IV, we
> + /// widen a version of the IV truncated to TruncType.
> +  void widenInductionVariable(const InductionDescriptor &II, VectorParts
> &Entry,
> +                              IntegerType *TruncType = nullptr);
> +
>    /// When we go over instructions in the basic block we rely on previous
>    /// values within the current basic block or on loop invariant values.
>    /// When we widen (vectorize) values we place them in the map. If the
> values @@ -2099,6 +2107,40 @@ Value *InnerLoopVectorizer::getStepVecto
>    return getStepVector(Val, StartIdx, StepValue);  }
> 
> +void InnerLoopVectorizer::widenInductionVariable(const
> InductionDescriptor &II,
> +                                                 VectorParts &Entry,
> +                                                 IntegerType
> +*TruncType) {
> +  Value *Start = II.getStartValue();
> +  ConstantInt *Step = II.getConstIntStepValue();
> +  assert(Step && "Can not widen an IV with a non-constant step");
> +
> +  // Construct the initial value of the vector IV in the vector loop
> + preheader  auto CurrIP = Builder.saveIP();
> + Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator());
> +  if (TruncType) {
> +    Step = ConstantInt::getSigned(TruncType, Step->getSExtValue());
> +    Start = Builder.CreateCast(Instruction::Trunc, Start, TruncType);
> + }  Value *SplatStart = Builder.CreateVectorSplat(VF, Start);  Value
> + *SteppedStart = getStepVector(SplatStart, 0, Step);
> + Builder.restoreIP(CurrIP);
> +
> +  Value *SplatVF =
> +      ConstantVector::getSplat(VF, ConstantInt::get(Start->getType(),
> + VF));  // We may need to add the step a number of times, depending on
> + the unroll  // factor. The last of those goes into the PHI.
> +  PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2,
> "vec.ind",
> +
> + &*LoopVectorBody->getFirstInsertionPt());
> +  Value *LastInduction = VecInd;
> +  for (unsigned Part = 0; Part < UF; ++Part) {
> +    Entry[Part] = LastInduction;
> +    LastInduction = Builder.CreateAdd(LastInduction, SplatVF,
> + "step.add");  }
> +
> +  VecInd->addIncoming(SteppedStart, LoopVectorPreHeader);
> +  VecInd->addIncoming(LastInduction, LoopVectorBody); }
> +
>  Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx,
>                                            Value *Step) {
>    assert(Val->getType()->isVectorTy() && "Must be a vector"); @@ -4056,19
> +4098,25 @@ void InnerLoopVectorizer::widenPHIInstru
>      llvm_unreachable("Unknown induction");
>    case InductionDescriptor::IK_IntInduction: {
>      assert(P->getType() == II.getStartValue()->getType() && "Types must
> match");
> -    // Handle other induction variables that are now based on the
> -    // canonical one.
> -    Value *V = Induction;
> -    if (P != OldInduction) {
> -      V = Builder.CreateSExtOrTrunc(Induction, P->getType());
> -      V = II.transform(Builder, V, PSE.getSE(), DL);
> -      V->setName("offset.idx");
> -    }
> -    Value *Broadcasted = getBroadcastInstrs(V);
> -    // After broadcasting the induction variable we need to make the vector
> -    // consecutive by adding 0, 1, 2, etc.
> -    for (unsigned part = 0; part < UF; ++part)
> -      Entry[part] = getStepVector(Broadcasted, VF * part, II.getStep());
> +    if (P != OldInduction || VF == 1) {
> +      Value *V = Induction;
> +      // Handle other induction variables that are now based on the
> +      // canonical one.
> +      if (P != OldInduction) {
> +        V = Builder.CreateSExtOrTrunc(Induction, P->getType());
> +        V = II.transform(Builder, V, PSE.getSE(), DL);
> +        V->setName("offset.idx");
> +      }
> +      Value *Broadcasted = getBroadcastInstrs(V);
> +      // After broadcasting the induction variable we need to make the vector
> +      // consecutive by adding 0, 1, 2, etc.
> +      for (unsigned part = 0; part < UF; ++part)
> +        Entry[part] = getStepVector(Broadcasted, VF * part, II.getStep());
> +    } else {
> +      // Instead of re-creating the vector IV by splatting the scalar IV
> +      // in each iteration, we can make a new independent vector IV.
> +      widenInductionVariable(II, Entry);
> +    }
>      return;
>    }
>    case InductionDescriptor::IK_PtrInduction:
> @@ -4239,15 +4287,23 @@ void InnerLoopVectorizer::vectorizeBlock
>        if (CI->getOperand(0) == OldInduction &&
>            it->getOpcode() == Instruction::Trunc) {
>          InductionDescriptor II =
> -          Legal->getInductionVars()->lookup(OldInduction);
> +            Legal->getInductionVars()->lookup(OldInduction);
>          if (auto StepValue = II.getConstIntStepValue()) {
> -          StepValue = ConstantInt::getSigned(cast<IntegerType>(CI-
> >getType()),
> -                                             StepValue->getSExtValue());
> -          Value *ScalarCast = Builder.CreateCast(CI->getOpcode(), Induction,
> -                                                 CI->getType());
> -          Value *Broadcasted = getBroadcastInstrs(ScalarCast);
> -          for (unsigned Part = 0; Part < UF; ++Part)
> -            Entry[Part] = getStepVector(Broadcasted, VF * Part, StepValue);
> +          IntegerType *TruncType = cast<IntegerType>(CI->getType());
> +          if (VF == 1) {
> +            StepValue =
> +                ConstantInt::getSigned(TruncType, StepValue->getSExtValue());
> +            Value *ScalarCast =
> +                Builder.CreateCast(CI->getOpcode(), Induction, CI->getType());
> +            Value *Broadcasted = getBroadcastInstrs(ScalarCast);
> +            for (unsigned Part = 0; Part < UF; ++Part)
> +              Entry[Part] = getStepVector(Broadcasted, VF * Part, StepValue);
> +          } else {
> +            // Truncating a vector induction variable on each iteration
> +            // may be expensive. Instead, truncate the initial value, and create
> +            // a new, truncated, vector IV based on that.
> +            widenInductionVariable(II, Entry, TruncType);
> +          }
>            addMetadata(Entry, &*it);
>            break;
>          }
> 
> Modified: llvm/trunk/test/Transforms/LoopVectorize/PowerPC/vsx-tsvc-
> s173.ll
> URL: http://llvm.org/viewvc/llvm-
> project/llvm/trunk/test/Transforms/LoopVectorize/PowerPC/vsx-tsvc-
> s173.ll?rev=271410&r1=271409&r2=271410&view=diff
> ==========================================================
> ====================
> --- llvm/trunk/test/Transforms/LoopVectorize/PowerPC/vsx-tsvc-s173.ll
> (original)
> +++ llvm/trunk/test/Transforms/LoopVectorize/PowerPC/vsx-tsvc-s173.ll
> +++ Wed Jun  1 12:16:46 2016
> @@ -43,7 +43,7 @@ for.end12:
> 
>  ; CHECK-LABEL: @s173
>  ; CHECK: load <4 x float>, <4 x float>* -; CHECK: add i64 %index, 16000
> +; CHECK: add nsw i64 %.lhs, 16000
>  ; CHECK: ret i32 0
>  }
> 
> 
> Modified: llvm/trunk/test/Transforms/LoopVectorize/X86/gather_scatter.ll
> URL: http://llvm.org/viewvc/llvm-
> project/llvm/trunk/test/Transforms/LoopVectorize/X86/gather_scatter.ll?re
> v=271410&r1=271409&r2=271410&view=diff
> ==========================================================
> ====================
> --- llvm/trunk/test/Transforms/LoopVectorize/X86/gather_scatter.ll
> (original)
> +++ llvm/trunk/test/Transforms/LoopVectorize/X86/gather_scatter.ll Wed
> +++ Jun  1 12:16:46 2016
> @@ -95,7 +95,7 @@ for.end:
>  %struct.In = type { float, float }
> 
>  ;AVX512-LABEL: @foo2
> -;AVX512: getelementptr %struct.In, %struct.In* %in, <16 x i64> %induction,
> i32 1
> +;AVX512: getelementptr %struct.In, %struct.In* %in, <16 x i64> %{{.*}},
> +i32 1
>  ;AVX512: llvm.masked.gather.v16f32
>  ;AVX512: llvm.masked.store.v16f32
>  ;AVX512: ret void
> @@ -170,10 +170,10 @@ for.end:
>  ;}
> 
>  ;AVX512-LABEL: @foo3
> -;AVX512: getelementptr %struct.In, %struct.In* %in, <16 x i64> %induction,
> i32 1
> +;AVX512: getelementptr %struct.In, %struct.In* %in, <16 x i64> %{{.*}},
> +i32 1
>  ;AVX512: llvm.masked.gather.v16f32
>  ;AVX512: fadd <16 x float>
> -;AVX512: getelementptr %struct.Out, %struct.Out* %out, <16 x i64>
> %induction, i32 1
> +;AVX512: getelementptr %struct.Out, %struct.Out* %out, <16 x i64>
> +%{{.*}}, i32 1
>  ;AVX512: llvm.masked.scatter.v16f32
>  ;AVX512: ret void
> 
> 
> Modified: llvm/trunk/test/Transforms/LoopVectorize/cast-induction.ll
> URL: http://llvm.org/viewvc/llvm-
> project/llvm/trunk/test/Transforms/LoopVectorize/cast-
> induction.ll?rev=271410&r1=271409&r2=271410&view=diff
> ==========================================================
> ====================
> --- llvm/trunk/test/Transforms/LoopVectorize/cast-induction.ll (original)
> +++ llvm/trunk/test/Transforms/LoopVectorize/cast-induction.ll Wed Jun
> +++ 1 12:16:46 2016
> @@ -8,7 +8,7 @@ target triple = "x86_64-apple-macosx10.8  @a = common
> global [2048 x i32] zeroinitializer, align 16
> 
>  ;CHECK-LABEL: @example12(
> -;CHECK: trunc i64
> +;CHECK: %vec.ind1 = phi <4 x i32>
>  ;CHECK: store <4 x i32>
>  ;CHECK: ret void
>  define void @example12() nounwind uwtable ssp {
> 
> Modified: llvm/trunk/test/Transforms/LoopVectorize/gcc-examples.ll
> URL: http://llvm.org/viewvc/llvm-
> project/llvm/trunk/test/Transforms/LoopVectorize/gcc-
> examples.ll?rev=271410&r1=271409&r2=271410&view=diff
> ==========================================================
> ====================
> --- llvm/trunk/test/Transforms/LoopVectorize/gcc-examples.ll (original)
> +++ llvm/trunk/test/Transforms/LoopVectorize/gcc-examples.ll Wed Jun  1
> +++ 12:16:46 2016
> @@ -368,7 +368,7 @@ define void @example11() nounwind uwtabl  }
> 
>  ;CHECK-LABEL: @example12(
> -;CHECK: trunc i64
> +;CHECK: %vec.ind1 = phi <4 x i32>
>  ;CHECK: store <4 x i32>
>  ;CHECK: ret void
>  define void @example12() nounwind uwtable ssp {
> 
> Modified: llvm/trunk/test/Transforms/LoopVectorize/gep_with_bitcast.ll
> URL: http://llvm.org/viewvc/llvm-
> project/llvm/trunk/test/Transforms/LoopVectorize/gep_with_bitcast.ll?rev=
> 271410&r1=271409&r2=271410&view=diff
> ==========================================================
> ====================
> --- llvm/trunk/test/Transforms/LoopVectorize/gep_with_bitcast.ll (original)
> +++ llvm/trunk/test/Transforms/LoopVectorize/gep_with_bitcast.ll Wed Jun
> +++ 1 12:16:46 2016
> @@ -12,10 +12,11 @@ target datalayout = "e-m:e-i64:64-i128:1
> 
>  ; CHECK-LABEL: @foo
>  ; CHECK: vector.body
> -; CHECK:  %0 = getelementptr inbounds double*, double** %in, i64 %index -
> ; CHECK:  %1 = bitcast double** %0 to <4 x i64>* -; CHECK:  %wide.load = load
> <4 x i64>, <4 x i64>* %1, align 8 -; CHECK:  %2 = icmp eq <4 x i64> %wide.load,
> zeroinitializer
> +; CHECK:  %0 = phi
> +; CHECK:  %2 = getelementptr inbounds double*, double** %in, i64 %0 ;
> +CHECK:  %3 = bitcast double** %2 to <4 x i64>* ; CHECK:  %wide.load =
> +load <4 x i64>, <4 x i64>* %3, align 8 ; CHECK:  %4 = icmp eq <4 x i64>
> +%wide.load, zeroinitializer
>  ; CHECK:  br i1
> 
>  define void @foo(double** noalias nocapture readonly %in, double**
> noalias nocapture readnone %out, i8* noalias nocapture %res) #0 { @@ -37,4
> +38,4 @@ for.body:
> 
>  for.end:
>    ret void
> -}
> \ No newline at end of file
> +}
> 
> Modified: llvm/trunk/test/Transforms/LoopVectorize/global_alias.ll
> URL: http://llvm.org/viewvc/llvm-
> project/llvm/trunk/test/Transforms/LoopVectorize/global_alias.ll?rev=27141
> 0&r1=271409&r2=271410&view=diff
> ==========================================================
> ====================
> --- llvm/trunk/test/Transforms/LoopVectorize/global_alias.ll (original)
> +++ llvm/trunk/test/Transforms/LoopVectorize/global_alias.ll Wed Jun  1
> +++ 12:16:46 2016
> @@ -12,7 +12,7 @@ target datalayout = "e-p:32:32:32-i1:8:8  @PA = external
> global i32*
> 
> 
> -;; === First, the tests that should always vectorize, wither statically or by
> adding run-time checks ===
> +;; === First, the tests that should always vectorize, whether
> +statically or by adding run-time checks ===
> 
> 
>  ; /// Different objects, positive induction, constant distance @@ -387,7
> +387,7 @@ for.end:
>  ;   return Foo.A[a];
>  ; }
>  ; CHECK-LABEL: define i32 @noAlias08(
> -; CHECK: sub <4 x i32>
> +; CHECK: sub nuw nsw <4 x i32>
>  ; CHECK: ret
> 
>  define i32 @noAlias08(i32 %a) #0 {
> @@ -439,7 +439,7 @@ for.end:
>  ;   return Foo.A[a];
>  ; }
>  ; CHECK-LABEL: define i32 @noAlias09(
> -; CHECK: sub <4 x i32>
> +; CHECK: sub nuw nsw <4 x i32>
>  ; CHECK: ret
> 
>  define i32 @noAlias09(i32 %a) #0 {
> @@ -721,7 +721,7 @@ for.end:
>  ;   return Foo.A[a];
>  ; }
>  ; CHECK-LABEL: define i32 @noAlias14(
> -; CHECK: sub <4 x i32>
> +; CHECK: sub nuw nsw <4 x i32>
>  ; CHECK: ret
> 
>  define i32 @noAlias14(i32 %a) #0 {
> 
> Modified: llvm/trunk/test/Transforms/LoopVectorize/induction.ll
> URL: http://llvm.org/viewvc/llvm-
> project/llvm/trunk/test/Transforms/LoopVectorize/induction.ll?rev=271410
> &r1=271409&r2=271410&view=diff
> ==========================================================
> ====================
> --- llvm/trunk/test/Transforms/LoopVectorize/induction.ll (original)
> +++ llvm/trunk/test/Transforms/LoopVectorize/induction.ll Wed Jun  1
> +++ 12:16:46 2016
> @@ -1,4 +1,6 @@
>  ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-
> width=2 -S | FileCheck %s
> +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1
> +-force-vector-width=2 -instcombine -S | FileCheck %s --check-prefix=IND
> +; RUN: opt < %s -loop-vectorize -force-vector-interleave=2
> +-force-vector-width=2 -instcombine -S | FileCheck %s
> +--check-prefix=UNROLL
> 
>  target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-
> f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-
> n8:16:32:64-S128"
> 
> @@ -27,8 +29,6 @@ for.end:
>    ret void
>  }
> 
> -; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-
> width=2 -instcombine -S | FileCheck %s --check-prefix=IND
> -
>  ; Make sure we remove unneeded vectorization of induction variables.
>  ; In order for instcombine to cleanup the vectorized induction variables that
> we  ; create in the loop vectorizer we need to perform some form of
> redundancy @@ -241,3 +241,64 @@ entry:
>   exit:
>    ret void
>  }
> +
> +; Check that we generate vectorized IVs in the pre-header ; instead of
> +widening the scalar IV inside the loop, when ; we know how to do that.
> +; IND-LABEL: veciv
> +; IND: vector.body:
> +; IND: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body
> +] ; IND: %vec.ind = phi <2 x i32> [ <i32 0, i32 1>, %vector.ph ], [
> +%step.add, %vector.body ] ; IND: %step.add = add <2 x i32> %vec.ind,
> +<i32 2, i32 2> ; IND: %index.next = add i32 %index, 2 ; IND:
> +%[[CMP:.*]] = icmp eq i32 %index.next ; IND: br i1 %[[CMP]] ;
> +UNROLL-LABEL: veciv ; UNROLL: vector.body:
> +; UNROLL: %index = phi i32 [ 0, %vector.ph ], [ %index.next,
> +%vector.body ] ; UNROLL: %vec.ind = phi <2 x i32> [ <i32 0, i32 1>,
> +%vector.ph ], [ %step.add1, %vector.body ] ; UNROLL: %step.add = add <2
> +x i32> %vec.ind, <i32 2, i32 2> ; UNROLL: %step.add1 = add <2 x i32>
> +%vec.ind, <i32 4, i32 4> ; UNROLL: %index.next = add i32 %index, 4 ;
> +UNROLL: %[[CMP:.*]] = icmp eq i32 %index.next ; UNROLL: br i1 %[[CMP]]
> +define void @veciv(i32* nocapture %a, i32 %start, i32 %k) {
> +for.body.preheader:
> +  br label %for.body
> +
> +for.body:
> +  %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0,
> +%for.body.preheader ]
> +  %arrayidx = getelementptr inbounds i32, i32* %a, i32 %indvars.iv
> +  store i32 %indvars.iv, i32* %arrayidx, align 4
> +  %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
> +  %exitcond = icmp eq i32 %indvars.iv.next, %k
> +  br i1 %exitcond, label %exit, label %for.body
> +
> +exit:
> +  ret void
> +}
> +
> +; IND-LABEL: trunciv
> +; IND: vector.body:
> +; IND: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body
> +] ; IND: %[[VECIND:.*]] = phi <2 x i32> [ <i32 0, i32 1>, %vector.ph ],
> +[ %[[STEPADD:.*]], %vector.body ] ; IND: %[[STEPADD]] = add <2 x i32>
> +%[[VECIND]], <i32 2, i32 2> ; IND: %index.next = add i64 %index, 2 ;
> +IND: %[[CMP:.*]] = icmp eq i64 %index.next ; IND: br i1 %[[CMP]] define
> +void @trunciv(i32* nocapture %a, i32 %start, i64 %k) {
> +for.body.preheader:
> +  br label %for.body
> +
> +for.body:
> +  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0,
> +%for.body.preheader ]
> +  %trunc.iv = trunc i64 %indvars.iv to i32
> +  %arrayidx = getelementptr inbounds i32, i32* %a, i32 %trunc.iv
> +  store i32 %trunc.iv, i32* %arrayidx, align 4
> +  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
> +  %exitcond = icmp eq i64 %indvars.iv.next, %k
> +  br i1 %exitcond, label %exit, label %for.body
> +
> +exit:
> +  ret void
> +}
> 
> Modified: llvm/trunk/test/Transforms/LoopVectorize/induction_plus.ll
> URL: http://llvm.org/viewvc/llvm-
> project/llvm/trunk/test/Transforms/LoopVectorize/induction_plus.ll?rev=27
> 1410&r1=271409&r2=271410&view=diff
> ==========================================================
> ====================
> --- llvm/trunk/test/Transforms/LoopVectorize/induction_plus.ll (original)
> +++ llvm/trunk/test/Transforms/LoopVectorize/induction_plus.ll Wed Jun
> +++ 1 12:16:46 2016
> @@ -1,4 +1,4 @@
> -; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-
> width=4 -instcombine -S | FileCheck %s
> +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1
> +-force-vector-width=4 -S | FileCheck %s
> 
>  target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-
> f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-
> n8:16:32:64-S128"
>  target triple = "x86_64-apple-macosx10.8.0"
> @@ -6,8 +6,11 @@ target triple = "x86_64-apple-macosx10.8  @array =
> common global [1024 x i32] zeroinitializer, align 16
> 
>  ;CHECK-LABEL: @array_at_plus_one(
> -;CHECK: add i64 %index, 12
> -;CHECK: trunc i64
> +;CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body
> +]
> +;CHECK: %vec.ind = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>,
> +%vector.ph ], [ %step.add, %vector.body ]
> +;CHECK: %vec.ind1 = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>,
> +%vector.ph ], [ %step.add2, %vector.body ]
> +;CHECK: add <4 x i64> %vec.ind, <i64 4, i64 4, i64 4, i64 4>
> +;CHECK: add nsw <4 x i64> %vec.ind, <i64 12, i64 12, i64 12, i64 12>
>  ;CHECK: ret i32
>  define i32 @array_at_plus_one(i32 %n) nounwind uwtable ssp {
>    %1 = icmp sgt i32 %n, 0
> 
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits