[PATCH] D21449: Target independent codesize heuristics for Loop Idiom Recognition

Chad Rosier via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 20 10:14:04 PDT 2016


mcrosier added inline comments.

================
Comment at: test/Transforms/LoopIdiom/lir-heurs-multi-block-loop.ll:14
@@ +13,3 @@
+; CHECK-LABEL: LoopMemset
+; CHECK: memset
+;
----------------
We need more strict checks here?  My guess is this test will pass both before and after this change.  You need to account for the hoisting of the memset and possibly the length of the memset.

E.g.,
  ; CHECK-LABEL: @LoopMemset
  ; CHECK: for.body.preheader
  ; CHECK: @llvm.memset.p0i8.i64(i8* %D1
  ; CHECK: for.body
  ; CHECK: ret

Similar changes need to be made to the other tests.


================
Comment at: test/Transforms/LoopIdiom/lir-heurs-multi-block-loop.ll:15
@@ +14,3 @@
+; CHECK: memset
+;
+define void @LoopMemset([2048 x i8]* noalias nocapture %D, i32 %N) optsize {
----------------
Sunita_Marathe wrote:
> Sunita_Marathe wrote:
> > mcrosier wrote:
> > > I applied you patch, ran this lit test and it fails.  The input IR and the output IR after running the command line was unchanged.
> > I need to look into this because the lit test passes for me ...
> When I run the lit test in the workspace from which the uploaded patch was created, I get the following IR.   Note that 
> - For the first two functions which are intended to check the exempted cases for avoiding LIR in multi-block loops:  LIR has been done and memset calls inserted in the loop preheader 
> - For the third function which is intended to check that LIR is avoided for a multi-block top level loop: the pre and post IR is the same.
> 
> ; ModuleID = '<stdin>'
> source_filename = "<stdin>"
> 
> declare void @GF()
> 
> ; Function Attrs: argmemonly nounwind
> declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #0
> 
> ; Function Attrs: optsize
> define void @LoopMemset([2048 x i8]* noalias nocapture %D, i32 %N) #1 {
> entry:
>   %D1 = bitcast [2048 x i8]* %D to i8*
>   %cmp7 = icmp eq i32 %N, 0
>   br i1 %cmp7, label %for.end, label %for.body.preheader
> 
> for.body.preheader:                               ; preds = %entry
>   %0 = add i32 %N, -1
>   %1 = zext i32 %0 to i64
>   %2 = shl i64 %1, 11
>   %3 = add i64 %2, 2048
>   call void @llvm.memset.p0i8.i64(i8* %D1, i8 -1, i64 %3, i32 1, i1 false)
>   br label %for.body
> 
> for.body:                                         ; preds = %for.inc, %for.body.preheader
>   %i.08 = phi i32 [ %inc, %for.inc ], [ 0, %for.body.preheader ]
>   %cmp1 = icmp ugt i32 %i.08, 15
>   br i1 %cmp1, label %if.then, label %for.inc
> 
> if.then:                                          ; preds = %for.body
>   tail call void @GF()
>   br label %for.inc
> 
> for.inc:                                          ; preds = %if.then, %for.body
>   %inc = add i32 %i.08, 1
>   %cmp = icmp ult i32 %inc, %N
>   br i1 %cmp, label %for.body, label %for.end.loopexit
> 
> for.end.loopexit:                                 ; preds = %for.inc
>   br label %for.end
> 
> for.end:                                          ; preds = %for.end.loopexit, %entry
>   ret void
> }
> 
> 
> ; Function Attrs: optsize
> define void @NestedMemset_LoopMemset([2046 x i8]* noalias nocapture %D, i32 %N) #1 {
> entry:
>   %D1 = bitcast [2046 x i8]* %D to i8*
>   %cmp18 = icmp eq i32 %N, 0
>   br i1 %cmp18, label %for.end9, label %for.cond1.preheader.preheader
> 
> for.cond1.preheader.preheader:                    ; preds = %entry
>   %0 = add i32 %N, -1
>   %1 = zext i32 %0 to i64
>   %2 = mul i64 %1, 2046
>   %3 = add i64 %2, 2046
>   call void @llvm.memset.p0i8.i64(i8* %D1, i8 0, i64 %3, i32 1, i1 false)
>   br label %for.cond1.preheader
> 
> for.cond1.preheader:                              ; preds = %for.inc7, %for.cond1.preheader.preheader
>   %indvar = phi i64 [ %indvar.next, %for.inc7 ], [ 0, %for.cond1.preheader.preheader ]
>   %i.019 = phi i32 [ %inc8, %for.inc7 ], [ 0, %for.cond1.preheader.preheader ]
>   br label %for.body3
> 
> for.body3:                                        ; preds = %for.inc, %for.cond1.preheader
>   %j.017 = phi i32 [ 0, %for.cond1.preheader ], [ %inc, %for.inc ]
>   %cmp6 = icmp ugt i32 %j.017, 15
>   br i1 %cmp6, label %if.then, label %for.inc
> 
> if.then:                                          ; preds = %for.body3
>   tail call void @GF()
>   br label %for.inc
> 
> for.inc:                                          ; preds = %if.then, %for.body3
>   %inc = add i32 %j.017, 1
>   %cmp2 = icmp ult i32 %inc, 2046
>   br i1 %cmp2, label %for.body3, label %for.inc7
> 
> for.inc7:                                         ; preds = %for.inc
>   %inc8 = add i32 %i.019, 1
>   %cmp = icmp ult i32 %inc8, %N
>   %indvar.next = add i64 %indvar, 1
>   br i1 %cmp, label %for.cond1.preheader, label %for.end9.loopexit
> 
> for.end9.loopexit:                                ; preds = %for.inc7
>   br label %for.end9
> 
> for.end9:                                         ; preds = %for.end9.loopexit, %entry
>   ret void
> }
> 
> ; Function Attrs: optsize
> define void @Non_NestedMemset(i8* noalias nocapture %D, i32 %N) #1 {
> entry:
>   %cmp6 = icmp eq i32 %N, 0
>   br i1 %cmp6, label %for.end, label %for.body.preheader
> 
> for.body.preheader:                               ; preds = %entry
>   br label %for.body
> 
> for.body:                                         ; preds = %for.inc, %for.body.preheader
>   %i.07 = phi i32 [ %inc, %for.inc ], [ 0, %for.body.preheader ]
>   %idxprom = zext i32 %i.07 to i64
>   %arrayidx = getelementptr inbounds i8, i8* %D, i64 %idxprom
>   store i8 0, i8* %arrayidx, align 1
>   %cmp1 = icmp ugt i32 %i.07, 15
>   br i1 %cmp1, label %if.then, label %for.inc
> 
> if.then:                                          ; preds = %for.body
>   tail call void @GF()
>   br label %for.inc
> 
> for.inc:                                          ; preds = %if.then, %for.body
>   %inc = add i32 %i.07, 1
>   %cmp = icmp ult i32 %inc, %N
>   br i1 %cmp, label %for.body, label %for.end.loopexit
> 
> for.end.loopexit:                                 ; preds = %for.inc
>   br label %for.end
> 
> for.end:                                          ; preds = %for.end.loopexit, %entry
>   ret void
> }
> 
> 
I see what happened.  The LIR pass was ported to the new pass manager and the patch didn't apply cleanly on my end.  Would you mind rebasing the patch on top of trunk?


https://reviews.llvm.org/D21449





More information about the llvm-commits mailing list