[llvm-dev] loop unrolling introduces conditional branch
Xiangyang Guo via llvm-dev
llvm-dev at lists.llvm.org
Sat Aug 22 07:27:06 PDT 2015
Hi, Mehdi,
For example, I have this very simple source code:
void foo( int n, int array_x[])
{
for (int i=0; i < n; i++)
array_x[i] = i;
}
After I use "clang -emit-llvm -o bc_from_clang.bc -c try.cc", I get
bc_from_clang.bc. With my code (using LLVM IRbuilder API), I get
bc_from_api.bc. Attachment please find thse two files. I also past the IR
here.
******************************** Clang Generate IR Start
***********************************************************
; ModuleID = 'bc_from_clang.bc'
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; Function Attrs: nounwind uwtable
define void @_Z3fooiPi(i32 %n, i32* %array_x) #0 {
%1 = alloca i32, align 4
%2 = alloca i32*, align 8
%i = alloca i32, align 4
store i32 %n, i32* %1, align 4
store i32* %array_x, i32** %2, align 8
store i32 0, i32* %i, align 4
br label %3
; <label>:3 ; preds = %13, %0
%4 = load i32, i32* %i, align 4
%5 = load i32, i32* %1, align 4
%6 = icmp slt i32 %4, %5
br i1 %6, label %7, label %16
; <label>:7 ; preds = %3
%8 = load i32, i32* %i, align 4
%9 = load i32, i32* %i, align 4
%10 = sext i32 %9 to i64
%11 = load i32*, i32** %2, align 8
%12 = getelementptr inbounds i32, i32* %11, i64 %10
store i32 %8, i32* %12, align 4
br label %13
; <label>:13 ; preds = %7
%14 = load i32, i32* %i, align 4
%15 = add nsw i32 %14, 1
store i32 %15, i32* %i, align 4
br label %3
; <label>:16 ; preds = %3
ret void
}
attributes #0 = { nounwind uwtable "disable-tail-calls"="false"
"less-precise-fpmad"="false" "no-frame-pointer-elim"="true"
"no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false"
"no-nans-fp-math"="false" "stack-protector-buffer-size"="8"
"target-cpu"="x86-64" "target-features"="+sse,+sse2"
"unsafe-fp-math"="false" "use-soft-float"="false" }
!llvm.ident = !{!0}
!0 = !{!"clang version 3.8.0 (trunk 245730) (llvm/trunk 245727)"}
******************************** Clang Generate IR End
***********************************************************
******************************** API Generate IR Start
***********************************************************
; ModuleID = 'bc_from_api.bc'
target triple = "x86_64-unkown-linux-gnu"
; Function Attrs: nounwind
define void @_Z3fooiPi(i32 %n, i32* %array_x) #0 {
entry:
%n.addr = alloca i32, align 4
%array_x.addr = alloca i32*, align 8
%i = alloca i32, align 4
store i32 %n, i32* %n.addr, align 4
store i32* %array_x, i32** %array_x.addr, align 8
store i32 0, i32* %i, align 4
br label %for.cond
for.cond: ; preds = %for.inc, %entry
%0 = load i32, i32* %i, align 4
%1 = load i32, i32* %n.addr, align 4
%cmp = icmp slt i32 %0, %1
br i1 %cmp, label %for.body, label %for.end
for.body: ; preds = %for.cond
%2 = load i32, i32* %i, align 4
%3 = load i32, i32* %i, align 4
%idxprom = sext i32 %3 to i64
%4 = load i32*, i32** %array_x.addr, align 8
%arrayidx = getelementptr inbounds i32, i32* %4, i64 %idxprom
store i32 %2, i32* %arrayidx, align 4
br label %for.inc
for.inc: ; preds = %for.body
%5 = load i32, i32* %i, align 4
%inc = add i32 %5, 1
store i32 %inc, i32* %i, align 4
br label %for.cond
for.end: ; preds = %for.cond
ret void
}
attributes #0 = { nounwind }
******************************** API Generate IR End
***********************************************************
Then I use "opt file.bc -mem2reg -loops -loop-simplify -loop-rotate -lcssa
-indvars -loop-unroll -unroll-count=4 -irce -simplifycfg -S" to run both
.bc files.
The first .bc file give me this:
***************************** Clang Generate IR with LoopUnrolling
Start**********************************************
; ModuleID = 'bc_from_clang.bc'
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; Function Attrs: nounwind uwtable
define void @_Z3fooiPi(i32 %n, i32* %array_x) #0 {
%1 = icmp slt i32 0, %n
br i1 %1, label %.lr.ph, label %._crit_edge
.lr.ph: ; preds = %0
%2 = add i32 %n, -1
%xtraiter = and i32 %n, 3
%lcmp.mod = icmp ne i32 %xtraiter, 0
br i1 %lcmp.mod, label %3, label %.lr.ph.split
; <label>:3 ; preds = %3, %.lr.ph
%indvars.iv.prol = phi i64 [ 0, %.lr.ph ], [ %indvars.iv.next.prol, %3 ]
%prol.iter = phi i32 [ %xtraiter, %.lr.ph ], [ %prol.iter.sub, %3 ]
%4 = getelementptr inbounds i32, i32* %array_x, i64 %indvars.iv.prol
%5 = trunc i64 %indvars.iv.prol to i32
store i32 %5, i32* %4, align 4
%indvars.iv.next.prol = add nuw nsw i64 %indvars.iv.prol, 1
%lftr.wideiv.prol = trunc i64 %indvars.iv.next.prol to i32
%exitcond.prol = icmp ne i32 %lftr.wideiv.prol, %n
%prol.iter.sub = sub i32 %prol.iter, 1
%prol.iter.cmp = icmp ne i32 %prol.iter.sub, 0
br i1 %prol.iter.cmp, label %3, label %.lr.ph.split, !llvm.loop !1
.lr.ph.split: ; preds = %3, %.lr.ph
%indvars.iv.unr = phi i64 [ 0, %.lr.ph ], [ %indvars.iv.next.prol, %3 ]
%6 = icmp ult i32 %2, 3
br i1 %6, label %._crit_edge, label %.lr.ph.split.split
.lr.ph.split.split: ; preds = %.lr.ph.split,
%.lr.ph.split.split
%indvars.iv = phi i64 [ %indvars.iv.next.3, %.lr.ph.split.split ], [
%indvars.iv.unr, %.lr.ph.split ]
%7 = getelementptr inbounds i32, i32* %array_x, i64 %indvars.iv
%8 = trunc i64 %indvars.iv to i32
store i32 %8, i32* %7, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%9 = getelementptr inbounds i32, i32* %array_x, i64 %indvars.iv.next
%10 = trunc i64 %indvars.iv.next to i32
store i32 %10, i32* %9, align 4
%indvars.iv.next.1 = add nuw nsw i64 %indvars.iv.next, 1
%lftr.wideiv.1 = trunc i64 %indvars.iv.next.1 to i32
%11 = getelementptr inbounds i32, i32* %array_x, i64 %indvars.iv.next.1
%12 = trunc i64 %indvars.iv.next.1 to i32
store i32 %12, i32* %11, align 4
%indvars.iv.next.2 = add nuw nsw i64 %indvars.iv.next.1, 1
%lftr.wideiv.2 = trunc i64 %indvars.iv.next.2 to i32
%13 = getelementptr inbounds i32, i32* %array_x, i64 %indvars.iv.next.2
%14 = trunc i64 %indvars.iv.next.2 to i32
store i32 %14, i32* %13, align 4
%indvars.iv.next.3 = add nuw nsw i64 %indvars.iv.next.2, 1
%lftr.wideiv.3 = trunc i64 %indvars.iv.next.3 to i32
%exitcond.3 = icmp ne i32 %lftr.wideiv.3, %n
br i1 %exitcond.3, label %.lr.ph.split.split, label %._crit_edge
._crit_edge: ; preds = %.lr.ph.split,
%.lr.ph.split.split, %0
ret void
}
attributes #0 = { nounwind uwtable "disable-tail-calls"="false"
"less-precise-fpmad"="false" "no-frame-pointer-elim"="true"
"no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false"
"no-nans-fp-math"="false" "stack-protector-buffer-size"="8"
"target-cpu"="x86-64" "target-features"="+sse,+sse2"
"unsafe-fp-math"="false" "use-soft-float"="false" }
!llvm.ident = !{!0}
!0 = !{!"clang version 3.8.0 (trunk 245730) (llvm/trunk 245727)"}
!1 = distinct !{!1, !2}
!2 = !{!"llvm.loop.unroll.disable"}
******************************Clang Generate IR with LoopUnrolling
End***********************************************
The second .bc file gives me this:
******************************API Generate IR with LoopUnrolling
Start*************************************************
; ModuleID = 'bc_from_api.bc'
target triple = "x86_64-unkown-linux-gnu"
; Function Attrs: nounwind
define void @_Z3fooiPi(i32 %n, i32* %array_x) #0 {
entry:
%cmp.1 = icmp slt i32 0, %n
br i1 %cmp.1, label %for.body, label %for.end
for.body: ; preds = %entry,
%for.body.3
%i.02 = phi i32 [ %inc.3, %for.body.3 ], [ 0, %entry ]
%idxprom = sext i32 %i.02 to i64
%arrayidx = getelementptr inbounds i32, i32* %array_x, i64 %idxprom
store i32 %i.02, i32* %arrayidx, align 4
%inc = add nuw nsw i32 %i.02, 1
%cmp = icmp slt i32 %inc, %n
br i1 %cmp, label %for.body.1, label %for.end
for.end: ; preds = %for.body,
%for.body.1, %for.body.2, %for.body.3, %entry
ret void
for.body.1: ; preds = %for.body
%idxprom.1 = sext i32 %inc to i64
%arrayidx.1 = getelementptr inbounds i32, i32* %array_x, i64 %idxprom.1
store i32 %inc, i32* %arrayidx.1, align 4
%inc.1 = add nuw nsw i32 %inc, 1
%cmp.1.3 = icmp slt i32 %inc.1, %n
br i1 %cmp.1.3, label %for.body.2, label %for.end
for.body.2: ; preds = %for.body.1
%idxprom.2 = sext i32 %inc.1 to i64
%arrayidx.2 = getelementptr inbounds i32, i32* %array_x, i64 %idxprom.2
store i32 %inc.1, i32* %arrayidx.2, align 4
%inc.2 = add nuw nsw i32 %inc.1, 1
%cmp.2 = icmp slt i32 %inc.2, %n
br i1 %cmp.2, label %for.body.3, label %for.end
for.body.3: ; preds = %for.body.2
%idxprom.3 = sext i32 %inc.2 to i64
%arrayidx.3 = getelementptr inbounds i32, i32* %array_x, i64 %idxprom.3
store i32 %inc.2, i32* %arrayidx.3, align 4
%inc.3 = add nuw nsw i32 %inc.2, 1
%cmp.3 = icmp slt i32 %inc.3, %n
br i1 %cmp.3, label %for.body, label %for.end
}
attributes #0 = { nounwind }
******************************API Generate IR with LoopUnrolling
End**************************************************
Sorry I post two many code here. Can you give me any suggestion? Thanks
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20150822/fd12e18f/attachment.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: bc_from_clang.bc
Type: application/octet-stream
Size: 1260 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20150822/fd12e18f/attachment.obj>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: bc_from_api.bc
Type: application/octet-stream
Size: 788 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20150822/fd12e18f/attachment-0001.obj>
More information about the llvm-dev
mailing list