[PATCH] Break dependencies in large loops containing reductions (LoopVectorize)
Olivier Sallenave
ohsallen at us.ibm.com
Tue Feb 10 10:59:07 PST 2015
Full-context patch
http://reviews.llvm.org/D7514
Files:
lib/Transforms/Vectorize/LoopVectorize.cpp
test/Transforms/LoopVectorize/PowerPC/unrolled-rdx.ll
Index: test/Transforms/LoopVectorize/PowerPC/unrolled-rdx.ll
===================================================================
--- test/Transforms/LoopVectorize/PowerPC/unrolled-rdx.ll
+++ test/Transforms/LoopVectorize/PowerPC/unrolled-rdx.ll
@@ -0,0 +1,64 @@
+; RUN: opt < %s -loop-vectorize -S -debug < %s 2>&1 | FileCheck %s
+
+; CHECK: LV: Unrolling because of reductions.
+
+target datalayout = "e-m:e-i64:64-n32:64"
+target triple = "powerpc64le-ibm-linux-gnu"
+
+define void @QLA_F3_r_veq_norm2_V(float* noalias nocapture %r, [3 x { float, float }]* noalias nocapture readonly %a, i32 signext %n) #0 {
+entry:
+ %cmp24 = icmp sgt i32 %n, 0
+ br i1 %cmp24, label %for.cond1.preheader.lr.ph, label %for.end13
+
+for.cond1.preheader.lr.ph: ; preds = %entry
+ %0 = add i32 %n, -1
+ br label %for.cond1.preheader
+
+for.cond1.preheader: ; preds = %for.cond1.preheader.lr.ph, %for.body3
+ %indvars.iv = phi i64 [ 0, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next, %for.body3 ]
+ %sum.026 = phi double [ 0.000000e+00, %for.cond1.preheader.lr.ph ], [ %add10.2, %for.body3 ]
+ br label %for.body3
+
+for.body3: ; preds = %for.cond1.preheader
+ %arrayidx5.realp = getelementptr inbounds [3 x { float, float }]* %a, i64 %indvars.iv, i64 0, i32 0
+ %arrayidx5.real = load float* %arrayidx5.realp, align 8
+ %arrayidx5.imagp = getelementptr inbounds [3 x { float, float }]* %a, i64 %indvars.iv, i64 0, i32 1
+ %arrayidx5.imag = load float* %arrayidx5.imagp, align 8
+ %mul = fmul fast float %arrayidx5.real, %arrayidx5.real
+ %mul9 = fmul fast float %arrayidx5.imag, %arrayidx5.imag
+ %add = fadd fast float %mul9, %mul
+ %conv = fpext float %add to double
+ %add10 = fadd fast double %conv, %sum.026
+ %arrayidx5.realp.1 = getelementptr inbounds [3 x { float, float }]* %a, i64 %indvars.iv, i64 1, i32 0
+ %arrayidx5.real.1 = load float* %arrayidx5.realp.1, align 8
+ %arrayidx5.imagp.1 = getelementptr inbounds [3 x { float, float }]* %a, i64 %indvars.iv, i64 1, i32 1
+ %arrayidx5.imag.1 = load float* %arrayidx5.imagp.1, align 8
+ %mul.1 = fmul fast float %arrayidx5.real.1, %arrayidx5.real.1
+ %mul9.1 = fmul fast float %arrayidx5.imag.1, %arrayidx5.imag.1
+ %add.1 = fadd fast float %mul9.1, %mul.1
+ %conv.1 = fpext float %add.1 to double
+ %add10.1 = fadd fast double %conv.1, %add10
+ %arrayidx5.realp.2 = getelementptr inbounds [3 x { float, float }]* %a, i64 %indvars.iv, i64 2, i32 0
+ %arrayidx5.real.2 = load float* %arrayidx5.realp.2, align 8
+ %arrayidx5.imagp.2 = getelementptr inbounds [3 x { float, float }]* %a, i64 %indvars.iv, i64 2, i32 1
+ %arrayidx5.imag.2 = load float* %arrayidx5.imagp.2, align 8
+ %mul.2 = fmul fast float %arrayidx5.real.2, %arrayidx5.real.2
+ %mul9.2 = fmul fast float %arrayidx5.imag.2, %arrayidx5.imag.2
+ %add.2 = fadd fast float %mul9.2, %mul.2
+ %conv.2 = fpext float %add.2 to double
+ %add10.2 = fadd fast double %conv.2, %add10.1
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv to i32
+ %exitcond = icmp ne i32 %lftr.wideiv, %0
+ br i1 %exitcond, label %for.cond1.preheader, label %for.cond.for.end13_crit_edge
+
+for.cond.for.end13_crit_edge: ; preds = %for.body3
+ %add10.lcssa.lcssa = phi double [ %add10.2, %for.body3 ]
+ %phitmp = fptrunc double %add10.lcssa.lcssa to float
+ br label %for.end13
+
+for.end13: ; preds = %for.cond.for.end13_crit_edge, %entry
+ %sum.0.lcssa = phi float [ %phitmp, %for.cond.for.end13_crit_edge ], [ 0.000000e+00, %entry ]
+ store float %sum.0.lcssa, float* %r, align 4
+ ret void
+}
Index: lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- lib/Transforms/Vectorize/LoopVectorize.cpp
+++ lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4647,6 +4647,14 @@
return SmallUF;
}
+ // Unroll if this is a large loop (small loops are already dealt with by this
+ // point) and there is a scalar reduction that could benefit from unrolling.
+ if (!UnrollingRequiresRuntimePointerCheck &&
+ Legal->getReductionVars()->size()) {
+ DEBUG(dbgs() << "LV: Unrolling because of reductions.\n");
+ return UF;
+ }
+
DEBUG(dbgs() << "LV: Not Unrolling.\n");
return 1;
}
EMAIL PREFERENCES
http://reviews.llvm.org/settings/panel/emailpreferences/
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D7514.19696.patch
Type: text/x-patch
Size: 4373 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150210/50c4b237/attachment.bin>
More information about the llvm-commits
mailing list