[llvm] r266162 - Recommit r265547, and r265610, r265639, r265657 on top of it, plus

Tue Aug 16 10:06:05 PDT 2016

Sorry about it. I just removed the stale comment at r278821.

On Tue, Aug 16, 2016 at 10:00 AM, Justin Bogner <mail at justinbogner.com> wrote:
> Wei Mi via llvm-commits <llvm-commits at lists.llvm.org> writes:
>> Author: wmi
>> Date: Tue Apr 12 22:08:27 2016
>> New Revision: 266162
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=266162&view=rev
>> Log:
>> Recommit r265547, and r265610,r265639,r265657 on top of it, plus
>> two fixes with one about error verify-regalloc reported, and
>> another about live range update of phi after rematerialization.
>>
>> r265547:
>> Replace analyzeSiblingValues with new algorithm to fix its compile
>> time issue. The patch is to solve PR17409 and its duplicates.
>>
>> analyzeSiblingValues is a N x N complexity algorithm where N is
>> the number of siblings generated by reg splitting. Although it
>> causes siginificant compile time issue when N is large, it is also
>> important for performance since it removes redundent spills and
>> enables rematerialization.
>>
>> To solve the compile time issue, the patch removes analyzeSiblingValues
>> and replaces it with lower cost alternatives containing two parts. The
>> first part creates a new spill hoisting method in postOptimization of
>> register allocation. It does spill hoisting at once after all the spills
>> are generated instead of inside every instance of selectOrSplit. The
>> second part queries the define expr of the original register for
>> rematerializaiton and keep it always available during register allocation
>> even if it is already dead. It deletes those dead instructions only in
>> postOptimization. With the two parts in the patch, it can remove
>> analyzeSiblingValues without sacrificing performance.
>>
>> Patches on top of r265547:
>> r265610 "Fix the compare-clang diff error introduced by r265547."
>> r265639 "Fix the sanitizer bootstrap error in r265547."
>> r265657 "InlineSpiller.cpp: Escap \@ in r265547. [-Wdocumentation]"
>>
>> Differential Revision: http://reviews.llvm.org/D15302
>> Differential Revision: http://reviews.llvm.org/D18934
>> Differential Revision: http://reviews.llvm.org/D18935
>> Differential Revision: http://reviews.llvm.org/D18936
>>
>  ...
>> Added: llvm/trunk/test/CodeGen/X86/hoist-spill.ll
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/hoist-spill.ll?rev=266162&view=auto
>> ==============================================================================
>> --- llvm/trunk/test/CodeGen/X86/hoist-spill.ll (added)
>> +++ llvm/trunk/test/CodeGen/X86/hoist-spill.ll Tue Apr 12 22:08:27 2016
>> @@ -0,0 +1,121 @@
>> +; RUN: llc < %s | FileCheck %s
>> +
>> +; grep 'Spill' |sed 's%.*\(-[0-9]\+(\%rsp)\).*%\1%g' |sort |uniq -d |awk '{if (/rsp/); exit -1}'
>
> Is this a stale comment / something left in from when you were writing
> the test? It really isn't clear what this command that isn't being run
> is supposed to mean.
>
>> +; Check no spills to the same stack slot after hoisting.
>> +; CHECK: mov{{.}} %{{.*}}, [[SPOFFSET1:-?[0-9]*]](%rsp)
>> +; CHECK: mov{{.}} %{{.*}}, [[SPOFFSET2:-?[0-9]*]](%rsp)
>> +; CHECK: mov{{.}} %{{.*}}, [[SPOFFSET3:-?[0-9]*]](%rsp)
>> +; CHECK-NOT: mov{{.}} %{{.*}}, [[SPOFFSET1]](%rsp)
>> +; CHECK-NOT: mov{{.}} %{{.*}}, [[SPOFFSET2]](%rsp)
>> +; CHECK-NOT: mov{{.}} %{{.*}}, [[SPOFFSET3]](%rsp)
>> +
>> +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
>> +target triple = "x86_64-unknown-linux-gnu"
>> +
>> + at a = external global i32*, align 8
>> + at b = external global i32, align 4
>> + at d = external global i32*, align 8
>> +
>> +; Function Attrs: norecurse noreturn nounwind uwtable
>> +define void @fn1(i32 %p1) {
>> +entry:
>> +  %tmp = load i32*, i32** @d, align 8
>> +  %tmp1 = load i32*, i32** @a, align 8
>> +  %tmp2 = sext i32 %p1 to i64
>> +  br label %for.cond
>> +
>> +for.cond:                                         ; preds = %for.inc14, %entry
>> +  %indvar = phi i32 [ %indvar.next, %for.inc14 ], [ 0, %entry ]
>> +  %indvars.iv30.in = phi i32 [ %indvars.iv30, %for.inc14 ], [ %p1, %entry ]
>> +  %c.0 = phi i32 [ %inc15, %for.inc14 ], [ 1, %entry ]
>> +  %k.0 = phi i32 [ %k.1.lcssa, %for.inc14 ], [ undef, %entry ]
>> +  %tmp3 = icmp sgt i32 undef, 0
>> +  %smax52 = select i1 %tmp3, i32 undef, i32 0
>> +  %tmp4 = zext i32 %smax52 to i64
>> +  %tmp5 = icmp sgt i64 undef, %tmp4
>> +  %smax53 = select i1 %tmp5, i64 undef, i64 %tmp4
>> +  %tmp6 = add nsw i64 %smax53, 1
>> +  %tmp7 = sub nsw i64 %tmp6, %tmp4
>> +  %tmp8 = add nsw i64 %tmp7, -8
>> +  %tmp9 = sub i32 undef, %indvar
>> +  %tmp10 = icmp sgt i64 %tmp2, 0
>> +  %smax40 = select i1 %tmp10, i64 %tmp2, i64 0
>> +  %scevgep41 = getelementptr i32, i32* %tmp1, i64 %smax40
>> +  %indvars.iv30 = add i32 %indvars.iv30.in, -1
>> +  %tmp11 = icmp sgt i32 %indvars.iv30, 0
>> +  %smax = select i1 %tmp11, i32 %indvars.iv30, i32 0
>> +  %tmp12 = zext i32 %smax to i64
>> +  %sub = sub nsw i32 %p1, %c.0
>> +  %cmp = icmp sgt i32 %sub, 0
>> +  %sub. = select i1 %cmp, i32 %sub, i32 0
>> +  %cmp326 = icmp sgt i32 %k.0, %p1
>> +  br i1 %cmp326, label %for.cond4.preheader, label %for.body.preheader
>> +
>> +for.body.preheader:                               ; preds = %for.cond
>> +  br label %for.body
>> +
>> +for.cond4.preheader:                              ; preds = %for.body, %for.cond
>> +  %k.1.lcssa = phi i32 [ %k.0, %for.cond ], [ %add, %for.body ]
>> +  %cmp528 = icmp sgt i32 %sub., %p1
>> +  br i1 %cmp528, label %for.inc14, label %for.body6.preheader
>> +
>> +for.body6.preheader:                              ; preds = %for.cond4.preheader
>> +  br i1 undef, label %for.body6, label %min.iters.checked
>> +
>> +min.iters.checked:                                ; preds = %for.body6.preheader
>> +  br i1 undef, label %for.body6, label %vector.memcheck
>> +
>> +vector.memcheck:                                  ; preds = %min.iters.checked
>> +  %bound1 = icmp ule i32* undef, %scevgep41
>> +  %memcheck.conflict = and i1 undef, %bound1
>> +  br i1 %memcheck.conflict, label %for.body6, label %vector.body.preheader
>> +
>> +vector.body.preheader:                            ; preds = %vector.memcheck
>> +  %lcmp.mod = icmp eq i64 undef, 0
>> +  br i1 %lcmp.mod, label %vector.body.preheader.split, label %vector.body.prol
>> +
>> +vector.body.prol:                                 ; preds = %vector.body.prol, %vector.body.preheader
>> +  %prol.iter.cmp = icmp eq i64 undef, 0
>> +  br i1 %prol.iter.cmp, label %vector.body.preheader.split, label %vector.body.prol
>> +
>> +vector.body.preheader.split:                      ; preds = %vector.body.prol, %vector.body.preheader
>> +  %tmp13 = icmp ult i64 %tmp8, 24
>> +  br i1 %tmp13, label %middle.block, label %vector.body
>> +
>> +vector.body:                                      ; preds = %vector.body, %vector.body.preheader.split
>> +  %index = phi i64 [ %index.next.3, %vector.body ], [ 0, %vector.body.preheader.split ]
>> +  %index.next = add i64 %index, 8
>> +  %offset.idx.1 = add i64 %tmp12, %index.next
>> +  %tmp14 = getelementptr inbounds i32, i32* %tmp, i64 %offset.idx.1
>> +  %tmp15 = bitcast i32* %tmp14 to <4 x i32>*
>> +  %wide.load.1 = load <4 x i32>, <4 x i32>* %tmp15, align 4
>> +  %tmp16 = getelementptr inbounds i32, i32* %tmp1, i64 %offset.idx.1
>> +  %tmp17 = bitcast i32* %tmp16 to <4 x i32>*
>> +  store <4 x i32> %wide.load.1, <4 x i32>* %tmp17, align 4
>> +  %index.next.3 = add i64 %index, 32
>> +  br i1 undef, label %middle.block, label %vector.body
>> +
>> +middle.block:                                     ; preds = %vector.body, %vector.body.preheader.split
>> +  br i1 undef, label %for.inc14, label %for.body6
>> +
>> +for.body:                                         ; preds = %for.body, %for.body.preheader
>> +  %k.127 = phi i32 [ %k.0, %for.body.preheader ], [ %add, %for.body ]
>> +  %add = add nsw i32 %k.127, 1
>> +  %tmp18 = load i32, i32* undef, align 4
>> +  store i32 %tmp18, i32* @b, align 4
>> +  br i1 undef, label %for.body, label %for.cond4.preheader
>> +
>> +for.body6:                                        ; preds = %for.body6, %middle.block, %vector.memcheck, %min.iters.checked, %for.body6.preheader
>> +  %indvars.iv32 = phi i64 [ undef, %for.body6 ], [ %tmp12, %vector.memcheck ], [ %tmp12, %min.iters.checked ], [ %tmp12, %for.body6.preheader ], [ undef, %middle.block ]
>> +  %arrayidx8 = getelementptr inbounds i32, i32* %tmp, i64 %indvars.iv32
>> +  %tmp19 = load i32, i32* %arrayidx8, align 4
>> +  %arrayidx10 = getelementptr inbounds i32, i32* %tmp1, i64 %indvars.iv32
>> +  store i32 %tmp19, i32* %arrayidx10, align 4
>> +  %cmp5 = icmp slt i64 %indvars.iv32, undef
>> +  br i1 %cmp5, label %for.body6, label %for.inc14
>> +
>> +for.inc14:                                        ; preds = %for.body6, %middle.block, %for.cond4.preheader
>> +  %inc15 = add nuw nsw i32 %c.0, 1
>> +  %indvar.next = add i32 %indvar, 1
>> +  br label %for.cond
>> +}