[LLVMdev] Optimization puzzle...
Benoit Belley
Benoit.Belley at autodesk.com
Wed Mar 25 10:47:19 PDT 2015
Hi everyone,
I am wondering what¹s stopping the LLVM optimizer (opt -O3) from
eliminating the apparently useless « icmp sgt » instruction in the
following piece of LLVM IR.
> ; ModuleID = 'lambda-opt.bc'
> target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
> target triple = "x86_64-apple-macosx10.10.0"
>
> ; Function Attrs: nounwind readnone ssp uwtable
> define { <2 x float>, float } @_Z18sampleNullOperator5PointS_(i64
%pmin.coerce0, i32 %pmin.coerce1, i64 %pmax.coerce0, i32 %pmax.coerce1) #0
{
> _ZN15SamplingClosureD1Ev.exit:
> %0 = icmp sgt i32 %pmin.coerce1, %pmax.coerce1
> ret { <2 x float>, float } zeroinitializer
> }
>
> attributes #0 = { nounwind readnone ssp uwtable
"less-precise-fpmad"="false" "no-frame-pointer-elim"="true"
"no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false"
"no-nans-fp-math"="false" "stack-protector-buffer-size"="8"
"unsafe-fp-math"="false" "use-soft-float"="false" }
>
> !llvm.ident = !{!0}
>
> !0 = metadata !{metadata !"Apple LLVM version 6.0 (clang-600.0.57)
(based on LLVM 3.5svn)"}
The X86 code generation passes seems to eliminate it for the simple case
shown above. The generated assembly code is actually:
> __Z18sampleNullOperator5PointS_: ## @_Z18sampleNullOperator5PointS_
> .cfi_startproc
> ## BB#0: ## %_ZN15SamplingClosureD1Ev.exit
> push rbp
> Ltmp0:
> .cfi_def_cfa_offset 16
> Ltmp1:
> .cfi_offset rbp, -16
> mov rbp, rsp
> Ltmp2:
> .cfi_def_cfa_register rbp
> vxorps xmm0, xmm0, xmm0
> vxorps xmm1, xmm1, xmm1
> pop rbp
> ret
I am wondering because I think that it might explain why the LLVM IR code
shown below does not get simplified to a single "ret { <2 x float>, float
} zeroinitializer" instruction. It seems to me that the nested loops have
no side-effects and are guaranteed to terminate in a finite amount of
time. Unless, the nested "icmp sgt" instructions cannot be eliminated.
Of course, in that case, the X86 code generation passes can no longer do
their magic...
> ; ModuleID = 'lambda-opt.bc'
> target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
> target triple = "x86_64-apple-macosx10.10.0"
>
> ; Function Attrs: ssp uwtable
> define { <2 x float>, float } @_Z18sampleNullOperator5PointS_(i64
> %pmin.coerce0, i32 %pmin.coerce1, i64 %pmax.coerce0, i32
%pmax.coerce1) #0
> {
> %1 = icmp slt i32 %pmin.coerce1, %pmax.coerce1
> br i1 %1, label %.lr.ph35.i, label %_ZN15SamplingClosureD1Ev.exit
>
> .lr.ph35.i: ; preds = %0
> %2 = lshr i64 %pmin.coerce0, 32
> %3 = trunc i64 %2 to i32
> %4 = lshr i64 %pmax.coerce0, 32
> %5 = trunc i64 %4 to i32
> %6 = icmp slt i32 %3, %5
> %7 = trunc i64 %pmin.coerce0 to i32
> %8 = trunc i64 %pmax.coerce0 to i32
> %9 = icmp slt i32 %7, %8
> br i1 %6, label %.lr.ph23.us.i.preheader, label
> %_ZN15SamplingClosureD1Ev.exit
>
> .lr.ph23.us.i.preheader: ; preds =
%.lr.ph35.i
> %10 = trunc i64 %pmax.coerce0 to i32
> %11 = trunc i64 %pmin.coerce0 to i32
> %12 = sub i32 %10, %11
> br label %.lr.ph23.us.i
>
> ._crit_edge24.us-lcssa.us57.i: ; preds = %14,
> %.lr.ph23.us.i
> %13 = add nsw i32 %z.028.us.i, 1
> %exitcond66.i = icmp eq i32 %13, %pmax.coerce1
> br i1 %exitcond66.i, label %_ZN15SamplingClosureD1Ev.exit, label
> %.lr.ph23.us.i
>
> .lr.ph23.us.i: ; preds =
> %._crit_edge24.us-lcssa.us57.i, %.lr.ph23.us.i.preheader
> %z.028.us.i = phi i32 [ %13, %._crit_edge24.us-lcssa.us57.i ], [
> %pmin.coerce1, %.lr.ph23.us.i.preheader ]
> br i1 %9, label %.lr.ph.us.us.i, label
%._crit_edge24.us-lcssa.us57.i
>
> ; <label>:14 ; preds = %.noexc,
> %middle.block
> %15 = add nsw i32 %y.018.us.us.i, 1
> %exitcond65.i = icmp eq i32 %15, %5
> br i1 %exitcond65.i, label %._crit_edge24.us-lcssa.us57.i, label
> %.lr.ph.us.us.i
>
> .lr.ph.us.us.i: ; preds = %14,
> %.lr.ph23.us.i
> %y.018.us.us.i = phi i32 [ %15, %14 ], [ %3, %.lr.ph23.us.i ]
> %end.idx = add i32 %12, %7
> %n.vec = and i32 %12, -128
> %end.idx.rnd.down = add i32 %n.vec, %7
> %cmp.zero = icmp eq i32 %n.vec, 0
> br i1 %cmp.zero, label %middle.block, label %vector.body
>
> vector.body: ; preds =
%vector.body,
> %.lr.ph.us.us.i
> %index = phi i32 [ %index.next, %vector.body ], [ %7,
%.lr.ph.us.us.i ]
> %index.next = add i32 %index, 128
> %16 = icmp eq i32 %index.next, %end.idx.rnd.down
> br i1 %16, label %middle.block, label %vector.body, !llvm.loop !1
>
> middle.block: ; preds =
%vector.body,
> %.lr.ph.us.us.i
> %resume.val = phi i32 [ %7, %.lr.ph.us.us.i ], [ %end.idx.rnd.down,
> %vector.body ]
> %cmp.n = icmp eq i32 %end.idx, %resume.val
> br i1 %cmp.n, label %14, label %.noexc
>
> .noexc: ; preds = %.noexc,
> %middle.block
> %x.012.us.us.i = phi i32 [ %17, %.noexc ], [ %resume.val,
%middle.block ]
> %17 = add nsw i32 %x.012.us.us.i, 1
> %exitcond64.i = icmp eq i32 %17, %8
> br i1 %exitcond64.i, label %14, label %.noexc, !llvm.loop !4
>
> _ZN15SamplingClosureD1Ev.exit: ; preds =
> %._crit_edge24.us-lcssa.us57.i, %.lr.ph35.i, %0
> ret { <2 x float>, float } zeroinitializer
> }
>
> attributes #0 = { ssp uwtable "less-precise-fpmad"="false"
> "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"
> "no-infs-fp-math"="false" "no-nans-fp-math"="false"
> "stack-protector-buffer-size"="8" "unsafe-fp-math"="false"
> "use-soft-float"="false" }
>
> !llvm.ident = !{!0}
>
> !0 = metadata !{metadata !"Apple LLVM version 6.0 (clang-600.0.57)
(based
> on LLVM 3.5svn)"}
> !1 = metadata !{metadata !1, metadata !2, metadata !3}
> !2 = metadata !{metadata !"llvm.vectorizer.width", i32 1}
> !3 = metadata !{metadata !"llvm.vectorizer.unroll", i32 1}
> !4 = metadata !{metadata !4, metadata !2, metadata !3}
Thanks for you help,
Benoit
Benoit Belley
Sr Principal Developer
M&E-Product Development Group
MAIN +1 514 393 1616
DIRECT +1 438 448 6304
FAX +1 514 393 0110
Twitter <http://twitter.com/autodesk>
Facebook <https://www.facebook.com/Autodesk>
Autodesk, Inc.
10 Duke Street
Montreal, Quebec, Canada H3C 2L7
www.autodesk.com <http://www.autodesk.com/>
More information about the llvm-dev
mailing list