[llvm-dev] loop unrolling introduces conditional branch

Mehdi Amini via llvm-dev llvm-dev at lists.llvm.org
Sat Aug 22 10:17:20 PDT 2015


There is something fishy, can you try adding copy/pasting the datalayout in the .ll file?
When I did it I ended up with the same result for the two IR with opt.

— 
Mehdi

> On Aug 22, 2015, at 8:47 AM, Xiangyang Guo <xguo6 at ncsu.edu> wrote:
> 
> Thanks for your point that out. I just add DataLayout in my code such as "mod->setDataLayout("e-m:e-i64:64-f80:128-n8:16:32:64-S128");", still no luck.
> 
> I'm really confused about this. Do I need to add more passes before -loop-unroll?
> 
> On Sat, Aug 22, 2015 at 11:36 AM, Mehdi Amini <mehdi.amini at apple.com <mailto:mehdi.amini at apple.com>> wrote:
> 
>> On Aug 22, 2015, at 7:27 AM, Xiangyang Guo <xguo6 at ncsu.edu <mailto:xguo6 at ncsu.edu>> wrote:
>> 
>> Hi, Mehdi,
>> 
>> For example, I have this very simple source code:
>> void foo( int n, int array_x[])
>> {
>>     for (int i=0; i < n; i++)
>> 	    array_x[i] = i; 
>> }
>> 
>> After I use "clang -emit-llvm -o bc_from_clang.bc -c try.cc <https://urldefense.proofpoint.com/v2/url?u=http-3A__try.cc&d=BQMFaQ&c=eEvniauFctOgLOKGJOplqw&r=v-ruWq0KCv2O3thJZiK6naxuXK8mQHZUmGq5FBtAmZ4&m=Md4uZe64Q--bOhDzJswg35Dg5r4vcRon11DYXKI7lWU&s=KFMdI-L8rtHmALGsOCSPwRv8wIbnj6gXzo7sub9gRPw&e=>", I get bc_from_clang.bc. With my code (using LLVM IRbuilder API), I get bc_from_api.bc. Attachment please find thse two files. I also past the IR here.
>> ******************************** Clang Generate IR Start ***********************************************************
>> ; ModuleID = 'bc_from_clang.bc'
>> target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
>> target triple = "x86_64-unknown-linux-gnu"
>> 
>> ; Function Attrs: nounwind uwtable
>> define void @_Z3fooiPi(i32 %n, i32* %array_x) #0 {
>>   %1 = alloca i32, align 4
>>   %2 = alloca i32*, align 8
>>   %i = alloca i32, align 4
>>   store i32 %n, i32* %1, align 4
>>   store i32* %array_x, i32** %2, align 8
>>   store i32 0, i32* %i, align 4
>>   br label %3
>> 
>> ; <label>:3                                       ; preds = %13, %0
>>   %4 = load i32, i32* %i, align 4
>>   %5 = load i32, i32* %1, align 4
>>   %6 = icmp slt i32 %4, %5
>>   br i1 %6, label %7, label %16
>> 
>> ; <label>:7                                       ; preds = %3
>>   %8 = load i32, i32* %i, align 4
>>   %9 = load i32, i32* %i, align 4
>>   %10 = sext i32 %9 to i64
>>   %11 = load i32*, i32** %2, align 8
>>   %12 = getelementptr inbounds i32, i32* %11, i64 %10
>>   store i32 %8, i32* %12, align 4
>>   br label %13
>> 
>> ; <label>:13                                      ; preds = %7
>>   %14 = load i32, i32* %i, align 4
>>   %15 = add nsw i32 %14, 1
>>   store i32 %15, i32* %i, align 4
>>   br label %3
>> 
>> ; <label>:16                                      ; preds = %3
>>   ret void
>> }
>> 
>> attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
>> 
>> !llvm.ident = !{!0}
>> 
>> !0 = !{!"clang version 3.8.0 (trunk 245730) (llvm/trunk 245727)"}
>> 
>> ******************************** Clang Generate IR End  ***********************************************************
>> 
>> ******************************** API Generate IR Start     ***********************************************************
>> ; ModuleID = 'bc_from_api.bc'
>> target triple = "x86_64-unkown-linux-gnu"
>> 
>> ; Function Attrs: nounwind
>> define void @_Z3fooiPi(i32 %n, i32* %array_x) #0 {
>> entry:
>>   %n.addr = alloca i32, align 4
>>   %array_x.addr = alloca i32*, align 8
>>   %i = alloca i32, align 4
>>   store i32 %n, i32* %n.addr, align 4
>>   store i32* %array_x, i32** %array_x.addr, align 8
>>   store i32 0, i32* %i, align 4
>>   br label %for.cond
>> 
>> for.cond:                                         ; preds = %for.inc, %entry
>>   %0 = load i32, i32* %i, align 4
>>   %1 = load i32, i32* %n.addr, align 4
>>   %cmp = icmp slt i32 %0, %1
>>   br i1 %cmp, label %for.body, label %for.end
>> 
>> for.body:                                         ; preds = %for.cond
>>   %2 = load i32, i32* %i, align 4
>>   %3 = load i32, i32* %i, align 4
>>   %idxprom = sext i32 %3 to i64
>>   %4 = load i32*, i32** %array_x.addr, align 8
>>   %arrayidx = getelementptr inbounds i32, i32* %4, i64 %idxprom
>>   store i32 %2, i32* %arrayidx, align 4
>>   br label %for.inc
>> 
>> for.inc:                                          ; preds = %for.body
>>   %5 = load i32, i32* %i, align 4
>>   %inc = add i32 %5, 1
>>   store i32 %inc, i32* %i, align 4
>>   br label %for.cond
>> 
>> for.end:                                          ; preds = %for.cond
>>   ret void
>> }
>> 
>> attributes #0 = { nounwind }
>> 
>> ******************************** API Generate IR End      ***********************************************************
>> 
>> Then I use "opt file.bc -mem2reg -loops -loop-simplify -loop-rotate -lcssa -indvars -loop-unroll -unroll-count=4 -irce -simplifycfg -S" to run both .bc files.
>> The first .bc file give me this:
>> 
>> ***************************** Clang Generate IR with LoopUnrolling Start**********************************************
>> ; ModuleID = 'bc_from_clang.bc'
>> target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
>> target triple = "x86_64-unknown-linux-gnu"
>> 
>> ; Function Attrs: nounwind uwtable
>> define void @_Z3fooiPi(i32 %n, i32* %array_x) #0 {
>>   %1 = icmp slt i32 0, %n
>>   br i1 %1, label %.lr.ph <https://urldefense.proofpoint.com/v2/url?u=http-3A__lr.ph&d=BQMFaQ&c=eEvniauFctOgLOKGJOplqw&r=v-ruWq0KCv2O3thJZiK6naxuXK8mQHZUmGq5FBtAmZ4&m=9pD2qNHBiPJscAs-ZzWT7q61NeZNcwsY8Io52JnaI3E&s=OaVJWcn8TI03qp9lgqeWMIVMySoyBIPowkHDEyR7lvQ&e=>, label %._crit_edge
>> 
>> .lr.ph <https://urldefense.proofpoint.com/v2/url?u=http-3A__lr.ph&d=BQMFaQ&c=eEvniauFctOgLOKGJOplqw&r=v-ruWq0KCv2O3thJZiK6naxuXK8mQHZUmGq5FBtAmZ4&m=9pD2qNHBiPJscAs-ZzWT7q61NeZNcwsY8Io52JnaI3E&s=OaVJWcn8TI03qp9lgqeWMIVMySoyBIPowkHDEyR7lvQ&e=>:                                           ; preds = %0
>>   %2 = add i32 %n, -1
>>   %xtraiter = and i32 %n, 3
>>   %lcmp.mod = icmp ne i32 %xtraiter, 0
>>   br i1 %lcmp.mod, label %3, label %.lr.ph.split
>> 
>> ; <label>:3                                       ; preds = %3, %.lr.ph <https://urldefense.proofpoint.com/v2/url?u=http-3A__lr.ph&d=BQMFaQ&c=eEvniauFctOgLOKGJOplqw&r=v-ruWq0KCv2O3thJZiK6naxuXK8mQHZUmGq5FBtAmZ4&m=9pD2qNHBiPJscAs-ZzWT7q61NeZNcwsY8Io52JnaI3E&s=OaVJWcn8TI03qp9lgqeWMIVMySoyBIPowkHDEyR7lvQ&e=>
>>   %indvars.iv.prol = phi i64 [ 0, %.lr.ph <https://urldefense.proofpoint.com/v2/url?u=http-3A__lr.ph&d=BQMFaQ&c=eEvniauFctOgLOKGJOplqw&r=v-ruWq0KCv2O3thJZiK6naxuXK8mQHZUmGq5FBtAmZ4&m=9pD2qNHBiPJscAs-ZzWT7q61NeZNcwsY8Io52JnaI3E&s=OaVJWcn8TI03qp9lgqeWMIVMySoyBIPowkHDEyR7lvQ&e=> ], [ %indvars.iv.next.prol, %3 ]
>>   %prol.iter = phi i32 [ %xtraiter, %.lr.ph <https://urldefense.proofpoint.com/v2/url?u=http-3A__lr.ph&d=BQMFaQ&c=eEvniauFctOgLOKGJOplqw&r=v-ruWq0KCv2O3thJZiK6naxuXK8mQHZUmGq5FBtAmZ4&m=9pD2qNHBiPJscAs-ZzWT7q61NeZNcwsY8Io52JnaI3E&s=OaVJWcn8TI03qp9lgqeWMIVMySoyBIPowkHDEyR7lvQ&e=> ], [ %prol.iter.sub, %3 ]
>>   %4 = getelementptr inbounds i32, i32* %array_x, i64 %indvars.iv.prol
>>   %5 = trunc i64 %indvars.iv.prol to i32
>>   store i32 %5, i32* %4, align 4
>>   %indvars.iv.next.prol = add nuw nsw i64 %indvars.iv.prol, 1
>>   %lftr.wideiv.prol = trunc i64 %indvars.iv.next.prol to i32
>>   %exitcond.prol = icmp ne i32 %lftr.wideiv.prol, %n
>>   %prol.iter.sub = sub i32 %prol.iter, 1
>>   %prol.iter.cmp = icmp ne i32 %prol.iter.sub, 0
>>   br i1 %prol.iter.cmp, label %3, label %.lr.ph.split, !llvm.loop !1
>> 
>> .lr.ph.split:                                     ; preds = %3, %.lr.ph <https://urldefense.proofpoint.com/v2/url?u=http-3A__lr.ph&d=BQMFaQ&c=eEvniauFctOgLOKGJOplqw&r=v-ruWq0KCv2O3thJZiK6naxuXK8mQHZUmGq5FBtAmZ4&m=9pD2qNHBiPJscAs-ZzWT7q61NeZNcwsY8Io52JnaI3E&s=OaVJWcn8TI03qp9lgqeWMIVMySoyBIPowkHDEyR7lvQ&e=>
>>   %indvars.iv.unr = phi i64 [ 0, %.lr.ph <https://urldefense.proofpoint.com/v2/url?u=http-3A__lr.ph&d=BQMFaQ&c=eEvniauFctOgLOKGJOplqw&r=v-ruWq0KCv2O3thJZiK6naxuXK8mQHZUmGq5FBtAmZ4&m=9pD2qNHBiPJscAs-ZzWT7q61NeZNcwsY8Io52JnaI3E&s=OaVJWcn8TI03qp9lgqeWMIVMySoyBIPowkHDEyR7lvQ&e=> ], [ %indvars.iv.next.prol, %3 ]
>>   %6 = icmp ult i32 %2, 3
>>   br i1 %6, label %._crit_edge, label %.lr.ph.split.split
>> 
>> .lr.ph.split.split:                               ; preds = %.lr.ph.split, %.lr.ph.split.split
>>   %indvars.iv = phi i64 [ %indvars.iv.next.3, %.lr.ph.split.split ], [ %indvars.iv.unr, %.lr.ph.split ]
>>   %7 = getelementptr inbounds i32, i32* %array_x, i64 %indvars.iv
>>   %8 = trunc i64 %indvars.iv to i32
>>   store i32 %8, i32* %7, align 4
>>   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
>>   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
>>   %9 = getelementptr inbounds i32, i32* %array_x, i64 %indvars.iv.next
>>   %10 = trunc i64 %indvars.iv.next to i32
>>   store i32 %10, i32* %9, align 4
>>   %indvars.iv.next.1 = add nuw nsw i64 %indvars.iv.next, 1
>>   %lftr.wideiv.1 = trunc i64 %indvars.iv.next.1 to i32
>>   %11 = getelementptr inbounds i32, i32* %array_x, i64 %indvars.iv.next.1
>>   %12 = trunc i64 %indvars.iv.next.1 to i32
>>   store i32 %12, i32* %11, align 4
>>   %indvars.iv.next.2 = add nuw nsw i64 %indvars.iv.next.1, 1
>>   %lftr.wideiv.2 = trunc i64 %indvars.iv.next.2 to i32
>>   %13 = getelementptr inbounds i32, i32* %array_x, i64 %indvars.iv.next.2
>>   %14 = trunc i64 %indvars.iv.next.2 to i32
>>   store i32 %14, i32* %13, align 4
>>   %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv.next.2, 1
>>   %lftr.wideiv.3 = trunc i64 %indvars.iv.next.3 to i32
>>   %exitcond.3 = icmp ne i32 %lftr.wideiv.3, %n
>>   br i1 %exitcond.3, label %.lr.ph.split.split, label %._crit_edge
>> 
>> ._crit_edge:                                      ; preds = %.lr.ph.split, %.lr.ph.split.split, %0
>>   ret void
>> }
>> 
>> attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
>> 
>> !llvm.ident = !{!0}
>> 
>> !0 = !{!"clang version 3.8.0 (trunk 245730) (llvm/trunk 245727)"}
>> !1 = distinct !{!1, !2}
>> !2 = !{!"llvm.loop.unroll.disable"}
>> 
>> ******************************Clang Generate IR with LoopUnrolling End***********************************************
>> 
>> The second .bc file gives me this:
>> ******************************API Generate IR with LoopUnrolling Start*************************************************
>> ; ModuleID = 'bc_from_api.bc'
>> target triple = "x86_64-unkown-linux-gnu"
>> 
>> ; Function Attrs: nounwind
>> define void @_Z3fooiPi(i32 %n, i32* %array_x) #0 {
>> entry:
>>   %cmp.1 = icmp slt i32 0, %n
>>   br i1 %cmp.1, label %for.body, label %for.end
>> 
>> for.body:                                         ; preds = %entry, %for.body.3
>>   %i.02 = phi i32 [ %inc.3, %for.body.3 ], [ 0, %entry ]
>>   %idxprom = sext i32 %i.02 to i64
>>   %arrayidx = getelementptr inbounds i32, i32* %array_x, i64 %idxprom
>>   store i32 %i.02, i32* %arrayidx, align 4
>>   %inc = add nuw nsw i32 %i.02, 1
>>   %cmp = icmp slt i32 %inc, %n
>>   br i1 %cmp, label %for.body.1, label %for.end
>> 
>> for.end:                                          ; preds = %for.body, %for.body.1, %for.body.2, %for.body.3, %entry
>>   ret void
>> 
>> for.body.1:                                       ; preds = %for.body
>>   %idxprom.1 = sext i32 %inc to i64
>>   %arrayidx.1 = getelementptr inbounds i32, i32* %array_x, i64 %idxprom.1
>>   store i32 %inc, i32* %arrayidx.1, align 4
>>   %inc.1 = add nuw nsw i32 %inc, 1
>>   %cmp.1.3 = icmp slt i32 %inc.1, %n
>>   br i1 %cmp.1.3, label %for.body.2, label %for.end
>> 
>> for.body.2:                                       ; preds = %for.body.1
>>   %idxprom.2 = sext i32 %inc.1 to i64
>>   %arrayidx.2 = getelementptr inbounds i32, i32* %array_x, i64 %idxprom.2
>>   store i32 %inc.1, i32* %arrayidx.2, align 4
>>   %inc.2 = add nuw nsw i32 %inc.1, 1
>>   %cmp.2 = icmp slt i32 %inc.2, %n
>>   br i1 %cmp.2, label %for.body.3, label %for.end
>> 
>> for.body.3:                                       ; preds = %for.body.2
>>   %idxprom.3 = sext i32 %inc.2 to i64
>>   %arrayidx.3 = getelementptr inbounds i32, i32* %array_x, i64 %idxprom.3
>>   store i32 %inc.2, i32* %arrayidx.3, align 4
>>   %inc.3 = add nuw nsw i32 %inc.2, 1
>>   %cmp.3 = icmp slt i32 %inc.3, %n
>>   br i1 %cmp.3, label %for.body, label %for.end
>> }
>> 
>> attributes #0 = { nounwind }
>> ******************************API Generate IR with LoopUnrolling End**************************************************
>> 
>> Sorry I post two many code here. Can you give me any suggestion?
> 
> Yes, use an online service like pastebin :)
> 
> You don’t have defined the DataLayout in the API cases, it should help to do so.
> 
>> Mehdi
> 
> 

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20150822/fd5ad1c5/attachment.html>


More information about the llvm-dev mailing list