[llvm] r205387 - [LoopVectorizer] Count dependencies of consecutive pointers as uniforms

Hal Finkel hfinkel at anl.gov
Tue Apr 1 19:34:50 PDT 2014


Author: hfinkel
Date: Tue Apr  1 21:34:49 2014
New Revision: 205387

URL: http://llvm.org/viewvc/llvm-project?rev=205387&view=rev
Log:
[LoopVectorizer] Count dependencies of consecutive pointers as uniforms

For the purpose of calculating the cost of the loop at various vectorization
factors, we need to count dependencies of consecutive pointers as uniforms
(which means that the VF = 1 cost is used for all overall VF values).

For example, the TSVC benchmark function s173 has:
  ...
  %3 = add nsw i64 %indvars.iv, 16000
  %arrayidx8 = getelementptr inbounds %struct.GlobalData* @global_data, i64 0, i32 0, i64 %3
  ...
and we must realize that the add will be a scalar in order to correctly deduce
it to be profitable to vectorize this on PowerPC with VSX enabled. In fact, all
dependencies of a consecutive pointer must be a scalar (uniform), and so we
simply need to add all consecutive pointers to the worklist that currently
detects collects uniforms.

Fixes PR19296.

Added:
    llvm/trunk/test/Transforms/LoopVectorize/PowerPC/
    llvm/trunk/test/Transforms/LoopVectorize/PowerPC/lit.local.cfg
    llvm/trunk/test/Transforms/LoopVectorize/PowerPC/vsx-tsvc-s173.ll
Modified:
    llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp

Modified: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp?rev=205387&r1=205386&r2=205387&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp Tue Apr  1 21:34:49 2014
@@ -3656,6 +3656,16 @@ void LoopVectorizationLegality::collectL
   // Start with the conditional branch and walk up the block.
   Worklist.push_back(Latch->getTerminator()->getOperand(0));
 
+  // Also add all consecutive pointer values; these values will be uniform
+  // after vectorization (and subsequent cleanup) and, until revectorization is
+  // supported, all dependencies must also be uniform.
+  for (Loop::block_iterator B = TheLoop->block_begin(),
+       BE = TheLoop->block_end(); B != BE; ++B)
+    for (BasicBlock::iterator I = (*B)->begin(), IE = (*B)->end();
+         I != IE; ++I)
+      if (I->getType()->isPointerTy() && isConsecutivePtr(I))
+        Worklist.insert(Worklist.end(), I->op_begin(), I->op_end());
+
   while (Worklist.size()) {
     Instruction *I = dyn_cast<Instruction>(Worklist.back());
     Worklist.pop_back();

Added: llvm/trunk/test/Transforms/LoopVectorize/PowerPC/lit.local.cfg
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/PowerPC/lit.local.cfg?rev=205387&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/PowerPC/lit.local.cfg (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/PowerPC/lit.local.cfg Tue Apr  1 21:34:49 2014
@@ -0,0 +1,4 @@
+targets = set(config.root.targets_to_build.split())
+if not 'PowerPC' in targets:
+    config.unsupported = True
+

Added: llvm/trunk/test/Transforms/LoopVectorize/PowerPC/vsx-tsvc-s173.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/PowerPC/vsx-tsvc-s173.ll?rev=205387&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/PowerPC/vsx-tsvc-s173.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/PowerPC/vsx-tsvc-s173.ll Tue Apr  1 21:34:49 2014
@@ -0,0 +1,51 @@
+; RUN: opt < %s -mcpu=pwr7 -mattr=+vsx -loop-vectorize -instcombine -S | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%struct.GlobalData = type { [32000 x float], [3 x i32], [4 x i8], [32000 x float], [5 x i32], [12 x i8], [32000 x float], [7 x i32], [4 x i8], [32000 x float], [11 x i32], [4 x i8], [32000 x float], [13 x i32], [12 x i8], [256 x [256 x float]], [17 x i32], [12 x i8], [256 x [256 x float]], [19 x i32], [4 x i8], [256 x [256 x float]], [23 x i32], [4 x i8], [256 x [256 x float]] }
+
+ at global_data = external global %struct.GlobalData, align 16
+ at ntimes = external hidden unnamed_addr global i32, align 4
+
+define signext i32 @s173() #0 {
+entry:
+  %0 = load i32* @ntimes, align 4
+  %cmp21 = icmp sgt i32 %0, 0
+  br i1 %cmp21, label %for.cond1.preheader, label %for.end12
+
+for.cond1.preheader:                              ; preds = %for.end, %entry
+  %nl.022 = phi i32 [ %inc11, %for.end ], [ 0, %entry ]
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.body3, %for.cond1.preheader
+  %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ]
+  %arrayidx = getelementptr inbounds %struct.GlobalData* @global_data, i64 0, i32 0, i64 %indvars.iv
+  %1 = load float* %arrayidx, align 4
+  %arrayidx5 = getelementptr inbounds %struct.GlobalData* @global_data, i64 0, i32 3, i64 %indvars.iv
+  %2 = load float* %arrayidx5, align 4
+  %add = fadd float %1, %2
+  %3 = add nsw i64 %indvars.iv, 16000
+  %arrayidx8 = getelementptr inbounds %struct.GlobalData* @global_data, i64 0, i32 0, i64 %3
+  store float %add, float* %arrayidx8, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 16000
+  br i1 %exitcond, label %for.end, label %for.body3
+
+for.end:                                          ; preds = %for.body3
+  %inc11 = add nsw i32 %nl.022, 1
+  %4 = load i32* @ntimes, align 4
+  %mul = mul nsw i32 %4, 10
+  %cmp = icmp slt i32 %inc11, %mul
+  br i1 %cmp, label %for.cond1.preheader, label %for.end12
+
+for.end12:                                        ; preds = %for.end, %entry
+  ret i32 0
+
+; CHECK-LABEL: @s173
+; CHECK: load <4 x float>*
+; CHECK: add i64 %index, 16000
+; CHECK: ret i32 0
+}
+
+attributes #0 = { nounwind }
+





More information about the llvm-commits mailing list