[LLVMdev] complex branching generation

Bill Wendling isanbard at gmail.com
Wed Oct 1 13:44:40 PDT 2008


On Wed, Oct 1, 2008 at 1:19 PM, Villmow, Micah <Micah.Villmow at amd.com> wrote:
> LLVM seems to be generating way too complex of branching based on the
> short-circuit optimization. The code in question is as follows:
>
> define void @ test_fc_while_and(float %x, float %y, float addrspace(11)*
> %result) nounwind  {
>
> entry:
>
>         %tobool3 = fcmp une float %x, 0.000000e+000             ; <i1>
> [#uses=1]
>
>         %tobool24 = fcmp une float %y, 0.000000e+000            ; <i1>
> [#uses=2]
>
>         %or.cond5 = and i1 %tobool3, %tobool24          ; <i1> [#uses=1]
>
>         br i1 %or.cond5, label %bb.nph, label %whileexit
>
>
>
> bb.nph:         ; preds = %entry
>
>         br i1 %tobool24, label %whilebody.us, label %whilebody
>
>
>
> whilebody.us:           ; preds = %whilebody.us, %bb.nph
>
> …code here…
>
>         br i1 %phitmp, label %whilebody.us, label %whileexit
>
>
>
> whilebody:              ; preds = %bb.nph
>
> …code here…
>
>         br label %whileexit
>
>
>
> whileexit:              ; preds = %whilebody, %whilebody.us, %entry
>
>         %z.0.lcssa = phi float [ 0.000000e+000, %entry ], [ %add, %whilebody
> ], [ %add.us, %whilebody.us ]              ; <float> [#uses=1]
>
>         store float %z.0.lcssa, float addrspace(11)* %result
>
>         ret void
>
> }
>
> based on original code of:
>
> void test_fc_while_and(float x, float y, float* result)
>
> {
>
>         float z = (float)0;
>
>         while (x && y) {
>
>         z += (x * y);
>
>         ++x
>
>         }
>
>         *result = z;
>
> }
>
>
>
> Now the problem issue is with the bolded code. The two comparisons and the
> and instruction that are mapped to the bolded while statement. What I am
> trying to figure out why the bb.nph branch is even required and how do I
> disable it from being generated? The first branch instruction handles
> correctly the condition that I wanted, so there should be no reason that
> bb.nph is generated. The same for whilebody, as it shouldn't be there.
>

What optimization level are you running at? Here's what I get at -Os:

llvm-gcc -S -emit-llvm -o - a.c -Os
; ModuleID = 'a.c'
target datalayout =
"e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
target triple = "i386-apple-darwin9.5"

define void @test_fc_while_and(float %x, float %y, float* %result)
nounwind optsize {
entry:
	%.not7 = fcmp une float %x, 0.000000e+00		; <i1> [#uses=1]
	%0 = fcmp une float %y, 0.000000e+00		; <i1> [#uses=2]
	%or.cond8 = and i1 %.not7, %0		; <i1> [#uses=1]
	br i1 %or.cond8, label %bb, label %bb4

bb:		; preds = %bb, %entry
	%x_addr.06 = phi float [ %x, %entry ], [ %3, %bb ]		; <float> [#uses=2]
	%z.05 = phi float [ 0.000000e+00, %entry ], [ %2, %bb ]		; <float> [#uses=1]
	%1 = mul float %x_addr.06, %y		; <float> [#uses=1]
	%2 = add float %z.05, %1		; <float> [#uses=2]
	%3 = add float %x_addr.06, 1.000000e+00		; <float> [#uses=2]
	%phitmp = fcmp une float %3, 0.000000e+00		; <i1> [#uses=1]
	%or.cond = and i1 %phitmp, %0		; <i1> [#uses=1]
	br i1 %or.cond, label %bb, label %bb4

bb4:		; preds = %bb, %entry
	%z.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %2, %bb ]		;
<float> [#uses=1]
	store float %z.0.lcssa, float* %result, align 4
	ret void
}

-Os runs these passes more than -O2: -domfrontier -lcssa -loop-unroll

-bw




More information about the llvm-dev mailing list