[llvm] r228973 - Change max interleave factor to 12 for POWER7 and POWER8.

Olivier Sallenave ohsallen at us.ibm.com
Thu Feb 12 14:57:58 PST 2015


Author: ohsallen
Date: Thu Feb 12 16:57:58 2015
New Revision: 228973

URL: http://llvm.org/viewvc/llvm-project?rev=228973&view=rev
Log:
Change max interleave factor to 12 for POWER7 and POWER8.

Added:
    llvm/trunk/test/Transforms/LoopVectorize/PowerPC/small-loop-rdx.ll
Modified:
    llvm/trunk/lib/Target/PowerPC/PPCTargetTransformInfo.cpp

Modified: llvm/trunk/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCTargetTransformInfo.cpp?rev=228973&r1=228972&r2=228973&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCTargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCTargetTransformInfo.cpp Thu Feb 12 16:57:58 2015
@@ -226,6 +226,12 @@ unsigned PPCTTIImpl::getMaxInterleaveFac
   if (Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500)
     return 1;
 
+  // For P7 and P8, floating-point instructions have a 6-cycle latency and
+  // there are two execution units, so unroll by 12x for latency hiding.
+  if (Directive == PPC::DIR_PWR7 ||
+      Directive == PPC::DIR_PWR8)
+    return 12;
+
   // For most things, modern systems have two execution units (and
   // out-of-order execution).
   return 2;

Added: llvm/trunk/test/Transforms/LoopVectorize/PowerPC/small-loop-rdx.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/PowerPC/small-loop-rdx.ll?rev=228973&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/PowerPC/small-loop-rdx.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/PowerPC/small-loop-rdx.ll Thu Feb 12 16:57:58 2015
@@ -0,0 +1,35 @@
+; RUN: opt < %s -loop-vectorize -S -debug < %s 2>&1 | FileCheck %s
+
+; CHECK: LV: Unroll Factor is 12
+
+target datalayout = "e-m:e-i64:64-n32:64"
+target triple = "powerpc64le-ibm-linux-gnu"
+
+define void @test(double* nocapture readonly %arr, i32 signext %len) #0 {
+entry:
+  %cmp4 = icmp sgt i32 %len, 0
+  br i1 %cmp4, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  %0 = add i32 %len, -1
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %for.body.lr.ph
+  %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
+  %redx.05 = phi double [ 0.000000e+00, %for.body.lr.ph ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds double* %arr, i64 %indvars.iv
+  %1 = load double* %arrayidx, align 8
+  %add = fadd fast double %1, %redx.05
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %0
+  br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:                                 ; preds = %for.body
+  %add.lcssa = phi double [ %add, %for.body ]
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
+  %redx.0.lcssa = phi double [ 0.000000e+00, %entry ], [ %add.lcssa, %for.end.loopexit ]
+  ret void
+}





More information about the llvm-commits mailing list