[LLVMdev] RegisterCoalescing Pass seems to ignore part of CFG.

Vincent Lejeune vljn at ovi.com
Wed Oct 24 14:26:10 PDT 2012


Hi,

I don't know if my llvm ir code is faulty, or if I spot a bug in the RegisterCoalescing Pass, so I'm posting my issue on the ML. Shader and print-before-all dump are given below.

The interessing part is the vreg6/vreg48 reduction : before RegCoalescing, the machine code is :

// BEFORE LOOP
... Some COPYs....
400B%vreg47<def> = COPY %vreg2<kill>; R600_Reg32:%vreg47,%vreg2
416B%vreg48<def> = COPY %vreg3<kill>; R600_Reg128:%vreg48,%vreg3
432B%vreg49<def> = COPY %vreg13<kill>; R600_Reg32:%vreg49,%vreg13
    Successors according to CFG: BB#1


// LOOP CONDITION
464B%vreg5<def> = COPY %vreg47<kill>; R600_Reg32:%vreg5,%vreg47
480B%vreg6<def> = COPY %vreg48<kill>; R600_Reg128:%vreg6,%vreg48
496B%vreg7<def> = COPY %vreg49<kill>; R600_Reg32:%vreg7,%vreg49
512B%vreg29<def> = SETGT_INT 0, 0, 1, 0, 0, 0, %vreg0, 0, 0, 0, %vreg7, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg29,%vreg0,%vreg7
528B%vreg30<def> = COPY %vreg29<kill>; R600_Reg32:%vreg30,%vreg29
544B%PREDICATE_BIT<def> = PRED_X %vreg30<kill>, 152, 16; R600_Reg32:%vreg30
560BJUMP <BB#3>, pred:%PREDICATE_BIT
576BJUMP <BB#2>, pred:%noreg

// LOOP BODY
896B%vreg31<def> = COPY %vreg6:sel_x; R600_Reg32:%vreg31 R600_Reg128:%vreg6
912B%vreg32:sel_x<def,read-undef> = COPY %vreg31<kill>; R600_Reg128:%vreg32 R600_Reg32:%vreg31
928B%vreg34<def> = COPY %vreg6:sel_y; R600_Reg32:%vreg34 R600_Reg128:%vreg6
944B%vreg35<def> = COPY %vreg32<kill>; R600_Reg128:%vreg35,%vreg32
960B%vreg35:sel_y<def> = COPY %vreg34<kill>; R600_Reg128:%vreg35 R600_Reg32:%vreg34
976B%vreg36<def> = COPY %vreg35<kill>; R600_Reg128:%vreg36,%vreg35
992B%vreg36:sel_z<def> = COPY %vreg5<kill>; R600_Reg128:%vreg36 R600_Reg32:%vreg5
1008B%vreg37<def> = COPY %vreg6:sel_w; R600_Reg32:%vreg37 R600_Reg128:%vreg6
1024B%vreg10<def> = COPY %vreg36<kill>; R600_Reg128:%vreg10,%vreg36
1040B%vreg10:sel_w<def> = COPY %vreg37<kill>; R600_Reg128:%vreg10 R600_Reg32:%vreg37
1056B%vreg9<def> = COPY %vreg6:sel_z<kill>; R600_Reg32:%vreg9 R600_Reg128:%vreg6
1072B%vreg38<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 1; R600_Reg32:%vreg38
1088B%vreg11<def> = ADD_INT 0, 0, 1, 0, 0, 0, %vreg7<kill>, 0, 0, 0, %vreg38<kill>, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg11,%vreg7,%vreg38
1104B%vreg47<def> = COPY %vreg9<kill>; R600_Reg32:%vreg47,%vreg9
1120B%vreg48<def> = COPY %vreg10<kill>; R600_Reg128:%vreg48,%vreg10
1136B%vreg49<def> = COPY %vreg11<kill>; R600_Reg32:%vreg49,%vreg11
1152BJUMP <BB#1>, pred:%noreg

// EXPORTED VALUES
608B%vreg39<def> = COPY %vreg6:sel_x; R600_Reg32:%vreg39 R600_Reg128:%vreg6
624B%T2_X<def> = COPY %vreg39<kill>; R600_Reg32:%vreg39
640B%vreg40<def> = COPY %vreg6:sel_y; R600_Reg32:%vreg40 R600_Reg128:%vreg6
656B%T2_Y<def> = COPY %vreg40<kill>; R600_Reg32:%vreg40
672B%vreg41<def> = COPY %vreg6:sel_z; R600_Reg32:%vreg41 R600_Reg128:%vreg6
688B%T2_Z<def> = COPY %vreg41<kill>; R600_Reg32:%vreg41
704B%vreg42<def> = COPY %vreg6:sel_w<kill>; R600_Reg32:%vreg42 R600_Reg128:%vreg6
720B%T2_W<def> = COPY %vreg42<kill>; R600_Reg32:%vreg42

And after the pass :

//Before Loop
...Some COPYs...
128B%vreg27:sel_x<def,read-undef> = COPY %C1_X; R600_Reg128:%vreg27
192B%vreg27:sel_y<def> = COPY %C1_Y; R600_Reg128:%vreg27
272B%vreg27:sel_z<def> = COPY %C1_Z; R600_Reg128:%vreg27
320B%vreg27:sel_w<def> = COPY %C1_W; R600_Reg128:%vreg27

//LOOP CONDITION
512B%vreg30<def> = SETGT_INT 0, 0, 1, 0, 0, 0, %C0_X, 0, 0, 0, %vreg49, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg30,%vreg49
544B%PREDICATE_BIT<def> = PRED_X %vreg30, 152, 16; R600_Reg32:%vreg30
560BJUMP <BB#3>, pred:%PREDICATE_BIT
576BJUMP <BB#2>, pred:%noreg

//LOOP BODY
1072B%vreg38<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 1; R600_Reg32:%vreg38
1088B%vreg49<def> = ADD_INT 0, 0, 1, 0, 0, 0, %vreg49, 0, 0, 0, %vreg38<kill>, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg49,%vreg38
1152BJUMP <BB#1>, pred:%noreg

// EXPORTED VALUES
624B%T2_X<def> = COPY %vreg27:sel_x; R600_Reg128:%vreg27
656B%T2_Y<def> = COPY %vreg27:sel_y; R600_Reg128:%vreg27
688B%T2_Z<def> = COPY %vreg27:sel_z; R600_Reg128:%vreg27
720B%T2_W<def> = COPY %vreg27:sel_w; R600_Reg128:%vreg27


Apparently, the pass assumed that it's still in SSA mode, and join vreg6 with vreg27, thus ignoring the body block that can modify vreg48 value, and thus vreg6 value.
I don't know if I should manually tell the pass that it's not in SSA mode (I assume that previous pass like 2 address simplification pass does it), if I miss something in my LLVM IR,
or if it's a bug.

Regards,
Vincent

The LLVM IR is the following :
____________________________________________________
; ModuleID = 'glsl-to-llvm'

define void @main() {
  call void @llvm.AMDGPU.reserve.reg(i32 0)
  call void @llvm.AMDGPU.reserve.reg(i32 1)
  call void @llvm.AMDGPU.reserve.reg(i32 2)
  call void @llvm.AMDGPU.reserve.reg(i32 3)
  %1 = call float @llvm.AMDGPU.load.const(i32 0)
  %2 = bitcast float %1 to i32
  %3 = call float @llvm.AMDGPU.load.const(i32 4)
  %4 = insertelement <4 x float> undef, float %3, i32 0
  %5 = call float @llvm.AMDGPU.load.const(i32 5)
  %6 = insertelement <4 x float> %4, float %5, i32 1
  %7 = call float @llvm.AMDGPU.load.const(i32 6)
  %8 = insertelement <4 x float> %6, float %7, i32 2
  %9 = call float @llvm.AMDGPU.load.const(i32 7)
  %10 = insertelement <4 x float> %8, float %9, i32 3
  %11 = call float @llvm.R600.load.input(i32 4)
  %12 = insertelement <4 x float> undef, float %11, i32 0
  %13 = call float @llvm.R600.load.input(i32 5)
  %14 = insertelement <4 x float> %12, float %13, i32 1
  %15 = call float @llvm.R600.load.input(i32 6)
  %16 = insertelement <4 x float> %14, float %15, i32 2
  %17 = call float @llvm.R600.load.input(i32 7)
  %18 = insertelement <4 x float> %16, float %17, i32 3
  %19 = shufflevector <4 x float> %10, <4 x float> %10, <1 x i32> <i32 1>
  %20 = extractelement <1 x float> %19, i32 0
  %21 = insertelement <1 x float> undef, float %20, i32 0
  %22 = shufflevector <1 x float> undef, <1 x float> %21, <1 x i32> <i32 1>
  %23 = extractelement <1 x float> %22, i32 0
  %24 = shufflevector <4 x float> undef, <4 x float> %10, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
  br label %25

; <label>:25                                      ; preds = %41, %0
  %26 = phi float [ %45, %41 ], [ undef, %0 ]
  %27 = phi float [ %50, %41 ], [ %23, %0 ]
  %28 = phi <4 x float> [ %53, %41 ], [ %24, %0 ]
  %29 = phi i32 [ %58, %41 ], [ 0, %0 ]
  %30 = icmp sge i32 %29, %2
  br i1 %30, label %31, label %41

; <label>:31                                      ; preds = %25
  %32 = shufflevector <4 x float> undef, <4 x float> %18, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
  %33 = extractelement <4 x float> %28, i32 0
  call void @llvm.AMDGPU.store.output(float %33, i32 8)
  %34 = extractelement <4 x float> %28, i32 1
  call void @llvm.AMDGPU.store.output(float %34, i32 9)
  %35 = extractelement <4 x float> %28, i32 2
  call void @llvm.AMDGPU.store.output(float %35, i32 10)
  %36 = extractelement <4 x float> %28, i32 3
  call void @llvm.AMDGPU.store.output(float %36, i32 11)
  %37 = extractelement <4 x float> %32, i32 0
  call void @llvm.AMDGPU.store.output(float %37, i32 4)
  %38 = extractelement <4 x float> %32, i32 1
  call void @llvm.AMDGPU.store.output(float %38, i32 5)
  %39 = extractelement <4 x float> %32, i32 2
  call void @llvm.AMDGPU.store.output(float %39, i32 6)
  %40 = extractelement <4 x float> %32, i32 3
  call void @llvm.AMDGPU.store.output(float %40, i32 7)
  ret void

; <label>:41                                      ; preds = %25
  %42 = insertelement <1 x float> undef, float %27, i32 0
  %43 = insertelement <1 x float> undef, float %26, i32 0
  %44 = shufflevector <1 x float> %43, <1 x float> %42, <1 x i32> <i32 1>
  %45 = extractelement <1 x float> %44, i32 0
  %46 = shufflevector <4 x float> %28, <4 x float> %28, <1 x i32> <i32 2>
  %47 = extractelement <1 x float> %46, i32 0
  %48 = insertelement <1 x float> undef, float %47, i32 0
  %49 = shufflevector <1 x float> %42, <1 x float> %48, <1 x i32> <i32 1>
  %50 = extractelement <1 x float> %49, i32 0
  %51 = insertelement <1 x float> undef, float %45, i32 0
  %52 = shufflevector <1 x float> %51, <1 x float> %51, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
  %53 = shufflevector <4 x float> %28, <4 x float> %52, <4 x i32> <i32 0, i32 1, i32 4, i32 3>
  %54 = add i32 %29, 1
  %55 = insertelement <1 x i32> undef, i32 %54, i32 0
  %56 = insertelement <1 x i32> undef, i32 %29, i32 0
  %57 = shufflevector <1 x i32> %56, <1 x i32> %55, <1 x i32> <i32 1>
  %58 = extractelement <1 x i32> %57, i32 0
  br label %25
}

declare float @llvm.AMDGPU.load.const(i32) readnone

declare float @llvm.R600.load.input(i32) readnone

declare void @llvm.AMDGPU.store.output(float, i32)

declare void @llvm.AMDGPU.reserve.reg(i32)

declare float @llvm.R600.load.input.perspective(i32) readnone

declare float @llvm.R600.load.input.constant(i32) readnone

declare float @llvm.R600.load.input.linear(i32) readnone

_________________________________________________________________
The print before all dump is :

*** IR Dump Before Preliminary module verification ***
define void @main() {
  call void @llvm.AMDGPU.reserve.reg(i32 0)
  call void @llvm.AMDGPU.reserve.reg(i32 1)
  call void @llvm.AMDGPU.reserve.reg(i32 2)
  call void @llvm.AMDGPU.reserve.reg(i32 3)
  %1 = call float @llvm.AMDGPU.load.const(i32 0)
  %2 = bitcast float %1 to i32
  %3 = call float @llvm.AMDGPU.load.const(i32 4)
  %4 = insertelement <4 x float> undef, float %3, i32 0
  %5 = call float @llvm.AMDGPU.load.const(i32 5)
  %6 = insertelement <4 x float> %4, float %5, i32 1
  %7 = call float @llvm.AMDGPU.load.const(i32 6)
  %8 = insertelement <4 x float> %6, float %7, i32 2
  %9 = call float @llvm.AMDGPU.load.const(i32 7)
  %10 = insertelement <4 x float> %8, float %9, i32 3
  %11 = call float @llvm.R600.load.input(i32 4)
  %12 = insertelement <4 x float> undef, float %11, i32 0
  %13 = call float @llvm.R600.load.input(i32 5)
  %14 = insertelement <4 x float> %12, float %13, i32 1
  %15 = call float @llvm.R600.load.input(i32 6)
  %16 = insertelement <4 x float> %14, float %15, i32 2
  %17 = call float @llvm.R600.load.input(i32 7)
  %18 = insertelement <4 x float> %16, float %17, i32 3
  %19 = shufflevector <4 x float> %10, <4 x float> %10, <1 x i32> <i32 1>
  %20 = extractelement <1 x float> %19, i32 0
  %21 = insertelement <1 x float> undef, float %20, i32 0
  %22 = shufflevector <1 x float> undef, <1 x float> %21, <1 x i32> <i32 1>
  %23 = extractelement <1 x float> %22, i32 0
  %24 = shufflevector <4 x float> undef, <4 x float> %10, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
  br label %25

; <label>:25                                      ; preds = %41, %0
  %26 = phi float [ %45, %41 ], [ undef, %0 ]
  %27 = phi float [ %50, %41 ], [ %23, %0 ]
  %28 = phi <4 x float> [ %53, %41 ], [ %24, %0 ]
  %29 = phi i32 [ %58, %41 ], [ 0, %0 ]
  %30 = icmp sge i32 %29, %2
  br i1 %30, label %31, label %41

; <label>:31                                      ; preds = %25
  %32 = shufflevector <4 x float> undef, <4 x float> %18, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
  %33 = extractelement <4 x float> %28, i32 0
  call void @llvm.AMDGPU.store.output(float %33, i32 8)
  %34 = extractelement <4 x float> %28, i32 1
  call void @llvm.AMDGPU.store.output(float %34, i32 9)
  %35 = extractelement <4 x float> %28, i32 2
  call void @llvm.AMDGPU.store.output(float %35, i32 10)
  %36 = extractelement <4 x float> %28, i32 3
  call void @llvm.AMDGPU.store.output(float %36, i32 11)
  %37 = extractelement <4 x float> %32, i32 0
  call void @llvm.AMDGPU.store.output(float %37, i32 4)
  %38 = extractelement <4 x float> %32, i32 1
  call void @llvm.AMDGPU.store.output(float %38, i32 5)
  %39 = extractelement <4 x float> %32, i32 2
  call void @llvm.AMDGPU.store.output(float %39, i32 6)
  %40 = extractelement <4 x float> %32, i32 3
  call void @llvm.AMDGPU.store.output(float %40, i32 7)
  ret void

; <label>:41                                      ; preds = %25
  %42 = insertelement <1 x float> undef, float %27, i32 0
  %43 = insertelement <1 x float> undef, float %26, i32 0
  %44 = shufflevector <1 x float> %43, <1 x float> %42, <1 x i32> <i32 1>
  %45 = extractelement <1 x float> %44, i32 0
  %46 = shufflevector <4 x float> %28, <4 x float> %28, <1 x i32> <i32 2>
  %47 = extractelement <1 x float> %46, i32 0
  %48 = insertelement <1 x float> undef, float %47, i32 0
  %49 = shufflevector <1 x float> %42, <1 x float> %48, <1 x i32> <i32 1>
  %50 = extractelement <1 x float> %49, i32 0
  %51 = insertelement <1 x float> undef, float %45, i32 0
  %52 = shufflevector <1 x float> %51, <1 x float> %51, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
  %53 = shufflevector <4 x float> %28, <4 x float> %52, <4 x i32> <i32 0, i32 1, i32 4, i32 3>
  %54 = add i32 %29, 1
  %55 = insertelement <1 x i32> undef, i32 %54, i32 0
  %56 = insertelement <1 x i32> undef, i32 %29, i32 0
  %57 = shufflevector <1 x i32> %56, <1 x i32> %55, <1 x i32> <i32 1>
  %58 = extractelement <1 x i32> %57, i32 0
  br label %25
}
*** IR Dump Before Module Verifier ***
define void @main() {
  call void @llvm.AMDGPU.reserve.reg(i32 0)
  call void @llvm.AMDGPU.reserve.reg(i32 1)
  call void @llvm.AMDGPU.reserve.reg(i32 2)
  call void @llvm.AMDGPU.reserve.reg(i32 3)
  %1 = call float @llvm.AMDGPU.load.const(i32 0)
  %2 = bitcast float %1 to i32
  %3 = call float @llvm.AMDGPU.load.const(i32 4)
  %4 = insertelement <4 x float> undef, float %3, i32 0
  %5 = call float @llvm.AMDGPU.load.const(i32 5)
  %6 = insertelement <4 x float> %4, float %5, i32 1
  %7 = call float @llvm.AMDGPU.load.const(i32 6)
  %8 = insertelement <4 x float> %6, float %7, i32 2
  %9 = call float @llvm.AMDGPU.load.const(i32 7)
  %10 = insertelement <4 x float> %8, float %9, i32 3
  %11 = call float @llvm.R600.load.input(i32 4)
  %12 = insertelement <4 x float> undef, float %11, i32 0
  %13 = call float @llvm.R600.load.input(i32 5)
  %14 = insertelement <4 x float> %12, float %13, i32 1
  %15 = call float @llvm.R600.load.input(i32 6)
  %16 = insertelement <4 x float> %14, float %15, i32 2
  %17 = call float @llvm.R600.load.input(i32 7)
  %18 = insertelement <4 x float> %16, float %17, i32 3
  %19 = shufflevector <4 x float> %10, <4 x float> %10, <1 x i32> <i32 1>
  %20 = extractelement <1 x float> %19, i32 0
  %21 = insertelement <1 x float> undef, float %20, i32 0
  %22 = shufflevector <1 x float> undef, <1 x float> %21, <1 x i32> <i32 1>
  %23 = extractelement <1 x float> %22, i32 0
  %24 = shufflevector <4 x float> undef, <4 x float> %10, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
  br label %25

; <label>:25                                      ; preds = %41, %0
  %26 = phi float [ %45, %41 ], [ undef, %0 ]
  %27 = phi float [ %50, %41 ], [ %23, %0 ]
  %28 = phi <4 x float> [ %53, %41 ], [ %24, %0 ]
  %29 = phi i32 [ %58, %41 ], [ 0, %0 ]
  %30 = icmp sge i32 %29, %2
  br i1 %30, label %31, label %41

; <label>:31                                      ; preds = %25
  %32 = shufflevector <4 x float> undef, <4 x float> %18, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
  %33 = extractelement <4 x float> %28, i32 0
  call void @llvm.AMDGPU.store.output(float %33, i32 8)
  %34 = extractelement <4 x float> %28, i32 1
  call void @llvm.AMDGPU.store.output(float %34, i32 9)
  %35 = extractelement <4 x float> %28, i32 2
  call void @llvm.AMDGPU.store.output(float %35, i32 10)
  %36 = extractelement <4 x float> %28, i32 3
  call void @llvm.AMDGPU.store.output(float %36, i32 11)
  %37 = extractelement <4 x float> %32, i32 0
  call void @llvm.AMDGPU.store.output(float %37, i32 4)
  %38 = extractelement <4 x float> %32, i32 1
  call void @llvm.AMDGPU.store.output(float %38, i32 5)
  %39 = extractelement <4 x float> %32, i32 2
  call void @llvm.AMDGPU.store.output(float %39, i32 6)
  %40 = extractelement <4 x float> %32, i32 3
  call void @llvm.AMDGPU.store.output(float %40, i32 7)
  ret void

; <label>:41                                      ; preds = %25
  %42 = insertelement <1 x float> undef, float %27, i32 0
  %43 = insertelement <1 x float> undef, float %26, i32 0
  %44 = shufflevector <1 x float> %43, <1 x float> %42, <1 x i32> <i32 1>
  %45 = extractelement <1 x float> %44, i32 0
  %46 = shufflevector <4 x float> %28, <4 x float> %28, <1 x i32> <i32 2>
  %47 = extractelement <1 x float> %46, i32 0
  %48 = insertelement <1 x float> undef, float %47, i32 0
  %49 = shufflevector <1 x float> %42, <1 x float> %48, <1 x i32> <i32 1>
  %50 = extractelement <1 x float> %49, i32 0
  %51 = insertelement <1 x float> undef, float %45, i32 0
  %52 = shufflevector <1 x float> %51, <1 x float> %51, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
  %53 = shufflevector <4 x float> %28, <4 x float> %52, <4 x i32> <i32 0, i32 1, i32 4, i32 3>
  %54 = add i32 %29, 1
  %55 = insertelement <1 x i32> undef, i32 %54, i32 0
  %56 = insertelement <1 x i32> undef, i32 %29, i32 0
  %57 = shufflevector <1 x i32> %56, <1 x i32> %55, <1 x i32> <i32 1>
  %58 = extractelement <1 x i32> %57, i32 0
  br label %25
}
*** IR Dump Before Canonicalize natural loops ***
; <label>:25                                      ; preds = %41, %0
  %26 = phi float [ %45, %41 ], [ undef, %0 ]
  %27 = phi float [ %50, %41 ], [ %23, %0 ]
  %28 = phi <4 x float> [ %53, %41 ], [ %24, %0 ]
  %29 = phi i32 [ %58, %41 ], [ 0, %0 ]
  %30 = icmp sge i32 %29, %2
  br i1 %30, label %31, label %41

; <label>:41                                      ; preds = %25
  %42 = insertelement <1 x float> undef, float %27, i32 0
  %43 = insertelement <1 x float> undef, float %26, i32 0
  %44 = shufflevector <1 x float> %43, <1 x float> %42, <1 x i32> <i32 1>
  %45 = extractelement <1 x float> %44, i32 0
  %46 = shufflevector <4 x float> %28, <4 x float> %28, <1 x i32> <i32 2>
  %47 = extractelement <1 x float> %46, i32 0
  %48 = insertelement <1 x float> undef, float %47, i32 0
  %49 = shufflevector <1 x float> %42, <1 x float> %48, <1 x i32> <i32 1>
  %50 = extractelement <1 x float> %49, i32 0
  %51 = insertelement <1 x float> undef, float %45, i32 0
  %52 = shufflevector <1 x float> %51, <1 x float> %51, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
  %53 = shufflevector <4 x float> %28, <4 x float> %52, <4 x i32> <i32 0, i32 1, i32 4, i32 3>
  %54 = add i32 %29, 1
  %55 = insertelement <1 x i32> undef, i32 %54, i32 0
  %56 = insertelement <1 x i32> undef, i32 %29, i32 0
  %57 = shufflevector <1 x i32> %56, <1 x i32> %55, <1 x i32> <i32 1>
  %58 = extractelement <1 x i32> %57, i32 0
  br label %25
*** IR Dump Before Canonicalize natural loops ***
; <label>:25                                      ; preds = %41, %0
  %26 = phi float [ %45, %41 ], [ undef, %0 ]
  %27 = phi float [ %50, %41 ], [ %23, %0 ]
  %28 = phi <4 x float> [ %53, %41 ], [ %24, %0 ]
  %29 = phi i32 [ %58, %41 ], [ 0, %0 ]
  %30 = icmp sge i32 %29, %2
  br i1 %30, label %31, label %41

; <label>:41                                      ; preds = %25
  %42 = insertelement <1 x float> undef, float %27, i32 0
  %43 = insertelement <1 x float> undef, float %26, i32 0
  %44 = shufflevector <1 x float> %43, <1 x float> %42, <1 x i32> <i32 1>
  %45 = extractelement <1 x float> %44, i32 0
  %46 = shufflevector <4 x float> %28, <4 x float> %28, <1 x i32> <i32 2>
  %47 = extractelement <1 x float> %46, i32 0
  %48 = insertelement <1 x float> undef, float %47, i32 0
  %49 = shufflevector <1 x float> %42, <1 x float> %48, <1 x i32> <i32 1>
  %50 = extractelement <1 x float> %49, i32 0
  %51 = insertelement <1 x float> undef, float %45, i32 0
  %52 = shufflevector <1 x float> %51, <1 x float> %51, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
  %53 = shufflevector <4 x float> %28, <4 x float> %52, <4 x i32> <i32 0, i32 1, i32 4, i32 3>
  %54 = add i32 %29, 1
  %55 = insertelement <1 x i32> undef, i32 %54, i32 0
  %56 = insertelement <1 x i32> undef, i32 %29, i32 0
  %57 = shufflevector <1 x i32> %56, <1 x i32> %55, <1 x i32> <i32 1>
  %58 = extractelement <1 x i32> %57, i32 0
  br label %25
*** IR Dump Before Loop Strength Reduction ***
; <label>:25                                      ; preds = %41, %0
  %26 = phi float [ %45, %41 ], [ undef, %0 ]
  %27 = phi float [ %50, %41 ], [ %23, %0 ]
  %28 = phi <4 x float> [ %53, %41 ], [ %24, %0 ]
  %29 = phi i32 [ %58, %41 ], [ 0, %0 ]
  %30 = icmp sge i32 %29, %2
  br i1 %30, label %31, label %41

; <label>:41                                      ; preds = %25
  %42 = insertelement <1 x float> undef, float %27, i32 0
  %43 = insertelement <1 x float> undef, float %26, i32 0
  %44 = shufflevector <1 x float> %43, <1 x float> %42, <1 x i32> <i32 1>
  %45 = extractelement <1 x float> %44, i32 0
  %46 = shufflevector <4 x float> %28, <4 x float> %28, <1 x i32> <i32 2>
  %47 = extractelement <1 x float> %46, i32 0
  %48 = insertelement <1 x float> undef, float %47, i32 0
  %49 = shufflevector <1 x float> %42, <1 x float> %48, <1 x i32> <i32 1>
  %50 = extractelement <1 x float> %49, i32 0
  %51 = insertelement <1 x float> undef, float %45, i32 0
  %52 = shufflevector <1 x float> %51, <1 x float> %51, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
  %53 = shufflevector <4 x float> %28, <4 x float> %52, <4 x i32> <i32 0, i32 1, i32 4, i32 3>
  %54 = add i32 %29, 1
  %55 = insertelement <1 x i32> undef, i32 %54, i32 0
  %56 = insertelement <1 x i32> undef, i32 %29, i32 0
  %57 = shufflevector <1 x i32> %56, <1 x i32> %55, <1 x i32> <i32 1>
  %58 = extractelement <1 x i32> %57, i32 0
  br label %25
*** IR Dump Before Lower Garbage Collection Instructions ***
define void @main() {
  call void @llvm.AMDGPU.reserve.reg(i32 0)
  call void @llvm.AMDGPU.reserve.reg(i32 1)
  call void @llvm.AMDGPU.reserve.reg(i32 2)
  call void @llvm.AMDGPU.reserve.reg(i32 3)
  %1 = call float @llvm.AMDGPU.load.const(i32 0)
  %2 = bitcast float %1 to i32
  %3 = call float @llvm.AMDGPU.load.const(i32 4)
  %4 = insertelement <4 x float> undef, float %3, i32 0
  %5 = call float @llvm.AMDGPU.load.const(i32 5)
  %6 = insertelement <4 x float> %4, float %5, i32 1
  %7 = call float @llvm.AMDGPU.load.const(i32 6)
  %8 = insertelement <4 x float> %6, float %7, i32 2
  %9 = call float @llvm.AMDGPU.load.const(i32 7)
  %10 = insertelement <4 x float> %8, float %9, i32 3
  %11 = call float @llvm.R600.load.input(i32 4)
  %12 = insertelement <4 x float> undef, float %11, i32 0
  %13 = call float @llvm.R600.load.input(i32 5)
  %14 = insertelement <4 x float> %12, float %13, i32 1
  %15 = call float @llvm.R600.load.input(i32 6)
  %16 = insertelement <4 x float> %14, float %15, i32 2
  %17 = call float @llvm.R600.load.input(i32 7)
  %18 = insertelement <4 x float> %16, float %17, i32 3
  %19 = shufflevector <4 x float> %10, <4 x float> %10, <1 x i32> <i32 1>
  %20 = extractelement <1 x float> %19, i32 0
  %21 = insertelement <1 x float> undef, float %20, i32 0
  %22 = shufflevector <1 x float> undef, <1 x float> %21, <1 x i32> <i32 1>
  %23 = extractelement <1 x float> %22, i32 0
  %24 = shufflevector <4 x float> undef, <4 x float> %10, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
  br label %25

; <label>:25                                      ; preds = %41, %0
  %26 = phi float [ %45, %41 ], [ undef, %0 ]
  %27 = phi float [ %50, %41 ], [ %23, %0 ]
  %28 = phi <4 x float> [ %53, %41 ], [ %24, %0 ]
  %29 = phi i32 [ %58, %41 ], [ 0, %0 ]
  %30 = icmp sge i32 %29, %2
  br i1 %30, label %31, label %41

; <label>:31                                      ; preds = %25
  %32 = shufflevector <4 x float> undef, <4 x float> %18, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
  %33 = extractelement <4 x float> %28, i32 0
  call void @llvm.AMDGPU.store.output(float %33, i32 8)
  %34 = extractelement <4 x float> %28, i32 1
  call void @llvm.AMDGPU.store.output(float %34, i32 9)
  %35 = extractelement <4 x float> %28, i32 2
  call void @llvm.AMDGPU.store.output(float %35, i32 10)
  %36 = extractelement <4 x float> %28, i32 3
  call void @llvm.AMDGPU.store.output(float %36, i32 11)
  %37 = extractelement <4 x float> %32, i32 0
  call void @llvm.AMDGPU.store.output(float %37, i32 4)
  %38 = extractelement <4 x float> %32, i32 1
  call void @llvm.AMDGPU.store.output(float %38, i32 5)
  %39 = extractelement <4 x float> %32, i32 2
  call void @llvm.AMDGPU.store.output(float %39, i32 6)
  %40 = extractelement <4 x float> %32, i32 3
  call void @llvm.AMDGPU.store.output(float %40, i32 7)
  ret void

; <label>:41                                      ; preds = %25
  %42 = insertelement <1 x float> undef, float %27, i32 0
  %43 = insertelement <1 x float> undef, float %26, i32 0
  %44 = shufflevector <1 x float> %43, <1 x float> %42, <1 x i32> <i32 1>
  %45 = extractelement <1 x float> %44, i32 0
  %46 = shufflevector <4 x float> %28, <4 x float> %28, <1 x i32> <i32 2>
  %47 = extractelement <1 x float> %46, i32 0
  %48 = insertelement <1 x float> undef, float %47, i32 0
  %49 = shufflevector <1 x float> %42, <1 x float> %48, <1 x i32> <i32 1>
  %50 = extractelement <1 x float> %49, i32 0
  %51 = insertelement <1 x float> undef, float %45, i32 0
  %52 = shufflevector <1 x float> %51, <1 x float> %51, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
  %53 = shufflevector <4 x float> %28, <4 x float> %52, <4 x i32> <i32 0, i32 1, i32 4, i32 3>
  %54 = add i32 %29, 1
  %55 = insertelement <1 x i32> undef, i32 %54, i32 0
  %56 = insertelement <1 x i32> undef, i32 %29, i32 0
  %57 = shufflevector <1 x i32> %56, <1 x i32> %55, <1 x i32> <i32 1>
  %58 = extractelement <1 x i32> %57, i32 0
  br label %25
}
*** IR Dump Before Remove unreachable blocks from the CFG ***
define void @main() {
  call void @llvm.AMDGPU.reserve.reg(i32 0)
  call void @llvm.AMDGPU.reserve.reg(i32 1)
  call void @llvm.AMDGPU.reserve.reg(i32 2)
  call void @llvm.AMDGPU.reserve.reg(i32 3)
  %1 = call float @llvm.AMDGPU.load.const(i32 0)
  %2 = bitcast float %1 to i32
  %3 = call float @llvm.AMDGPU.load.const(i32 4)
  %4 = insertelement <4 x float> undef, float %3, i32 0
  %5 = call float @llvm.AMDGPU.load.const(i32 5)
  %6 = insertelement <4 x float> %4, float %5, i32 1
  %7 = call float @llvm.AMDGPU.load.const(i32 6)
  %8 = insertelement <4 x float> %6, float %7, i32 2
  %9 = call float @llvm.AMDGPU.load.const(i32 7)
  %10 = insertelement <4 x float> %8, float %9, i32 3
  %11 = call float @llvm.R600.load.input(i32 4)
  %12 = insertelement <4 x float> undef, float %11, i32 0
  %13 = call float @llvm.R600.load.input(i32 5)
  %14 = insertelement <4 x float> %12, float %13, i32 1
  %15 = call float @llvm.R600.load.input(i32 6)
  %16 = insertelement <4 x float> %14, float %15, i32 2
  %17 = call float @llvm.R600.load.input(i32 7)
  %18 = insertelement <4 x float> %16, float %17, i32 3
  %19 = shufflevector <4 x float> %10, <4 x float> %10, <1 x i32> <i32 1>
  %20 = extractelement <1 x float> %19, i32 0
  %21 = insertelement <1 x float> undef, float %20, i32 0
  %22 = shufflevector <1 x float> undef, <1 x float> %21, <1 x i32> <i32 1>
  %23 = extractelement <1 x float> %22, i32 0
  %24 = shufflevector <4 x float> undef, <4 x float> %10, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
  br label %25

; <label>:25                                      ; preds = %41, %0
  %26 = phi float [ %45, %41 ], [ undef, %0 ]
  %27 = phi float [ %50, %41 ], [ %23, %0 ]
  %28 = phi <4 x float> [ %53, %41 ], [ %24, %0 ]
  %29 = phi i32 [ %58, %41 ], [ 0, %0 ]
  %30 = icmp sge i32 %29, %2
  br i1 %30, label %31, label %41

; <label>:31                                      ; preds = %25
  %32 = shufflevector <4 x float> undef, <4 x float> %18, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
  %33 = extractelement <4 x float> %28, i32 0
  call void @llvm.AMDGPU.store.output(float %33, i32 8)
  %34 = extractelement <4 x float> %28, i32 1
  call void @llvm.AMDGPU.store.output(float %34, i32 9)
  %35 = extractelement <4 x float> %28, i32 2
  call void @llvm.AMDGPU.store.output(float %35, i32 10)
  %36 = extractelement <4 x float> %28, i32 3
  call void @llvm.AMDGPU.store.output(float %36, i32 11)
  %37 = extractelement <4 x float> %32, i32 0
  call void @llvm.AMDGPU.store.output(float %37, i32 4)
  %38 = extractelement <4 x float> %32, i32 1
  call void @llvm.AMDGPU.store.output(float %38, i32 5)
  %39 = extractelement <4 x float> %32, i32 2
  call void @llvm.AMDGPU.store.output(float %39, i32 6)
  %40 = extractelement <4 x float> %32, i32 3
  call void @llvm.AMDGPU.store.output(float %40, i32 7)
  ret void

; <label>:41                                      ; preds = %25
  %42 = insertelement <1 x float> undef, float %27, i32 0
  %43 = insertelement <1 x float> undef, float %26, i32 0
  %44 = shufflevector <1 x float> %43, <1 x float> %42, <1 x i32> <i32 1>
  %45 = extractelement <1 x float> %44, i32 0
  %46 = shufflevector <4 x float> %28, <4 x float> %28, <1 x i32> <i32 2>
  %47 = extractelement <1 x float> %46, i32 0
  %48 = insertelement <1 x float> undef, float %47, i32 0
  %49 = shufflevector <1 x float> %42, <1 x float> %48, <1 x i32> <i32 1>
  %50 = extractelement <1 x float> %49, i32 0
  %51 = insertelement <1 x float> undef, float %45, i32 0
  %52 = shufflevector <1 x float> %51, <1 x float> %51, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
  %53 = shufflevector <4 x float> %28, <4 x float> %52, <4 x i32> <i32 0, i32 1, i32 4, i32 3>
  %54 = add i32 %29, 1
  %55 = insertelement <1 x i32> undef, i32 %54, i32 0
  %56 = insertelement <1 x i32> undef, i32 %29, i32 0
  %57 = shufflevector <1 x i32> %56, <1 x i32> %55, <1 x i32> <i32 1>
  %58 = extractelement <1 x i32> %57, i32 0
  br label %25
}
*** IR Dump Before Lower invoke and unwind, for unwindless code generators ***
define void @main() {
  call void @llvm.AMDGPU.reserve.reg(i32 0)
  call void @llvm.AMDGPU.reserve.reg(i32 1)
  call void @llvm.AMDGPU.reserve.reg(i32 2)
  call void @llvm.AMDGPU.reserve.reg(i32 3)
  %1 = call float @llvm.AMDGPU.load.const(i32 0)
  %2 = bitcast float %1 to i32
  %3 = call float @llvm.AMDGPU.load.const(i32 4)
  %4 = insertelement <4 x float> undef, float %3, i32 0
  %5 = call float @llvm.AMDGPU.load.const(i32 5)
  %6 = insertelement <4 x float> %4, float %5, i32 1
  %7 = call float @llvm.AMDGPU.load.const(i32 6)
  %8 = insertelement <4 x float> %6, float %7, i32 2
  %9 = call float @llvm.AMDGPU.load.const(i32 7)
  %10 = insertelement <4 x float> %8, float %9, i32 3
  %11 = call float @llvm.R600.load.input(i32 4)
  %12 = insertelement <4 x float> undef, float %11, i32 0
  %13 = call float @llvm.R600.load.input(i32 5)
  %14 = insertelement <4 x float> %12, float %13, i32 1
  %15 = call float @llvm.R600.load.input(i32 6)
  %16 = insertelement <4 x float> %14, float %15, i32 2
  %17 = call float @llvm.R600.load.input(i32 7)
  %18 = insertelement <4 x float> %16, float %17, i32 3
  %19 = shufflevector <4 x float> %10, <4 x float> %10, <1 x i32> <i32 1>
  %20 = extractelement <1 x float> %19, i32 0
  %21 = insertelement <1 x float> undef, float %20, i32 0
  %22 = shufflevector <1 x float> undef, <1 x float> %21, <1 x i32> <i32 1>
  %23 = extractelement <1 x float> %22, i32 0
  %24 = shufflevector <4 x float> undef, <4 x float> %10, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
  br label %25

; <label>:25                                      ; preds = %41, %0
  %26 = phi float [ %45, %41 ], [ undef, %0 ]
  %27 = phi float [ %50, %41 ], [ %23, %0 ]
  %28 = phi <4 x float> [ %53, %41 ], [ %24, %0 ]
  %29 = phi i32 [ %58, %41 ], [ 0, %0 ]
  %30 = icmp sge i32 %29, %2
  br i1 %30, label %31, label %41

; <label>:31                                      ; preds = %25
  %32 = shufflevector <4 x float> undef, <4 x float> %18, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
  %33 = extractelement <4 x float> %28, i32 0
  call void @llvm.AMDGPU.store.output(float %33, i32 8)
  %34 = extractelement <4 x float> %28, i32 1
  call void @llvm.AMDGPU.store.output(float %34, i32 9)
  %35 = extractelement <4 x float> %28, i32 2
  call void @llvm.AMDGPU.store.output(float %35, i32 10)
  %36 = extractelement <4 x float> %28, i32 3
  call void @llvm.AMDGPU.store.output(float %36, i32 11)
  %37 = extractelement <4 x float> %32, i32 0
  call void @llvm.AMDGPU.store.output(float %37, i32 4)
  %38 = extractelement <4 x float> %32, i32 1
  call void @llvm.AMDGPU.store.output(float %38, i32 5)
  %39 = extractelement <4 x float> %32, i32 2
  call void @llvm.AMDGPU.store.output(float %39, i32 6)
  %40 = extractelement <4 x float> %32, i32 3
  call void @llvm.AMDGPU.store.output(float %40, i32 7)
  ret void

; <label>:41                                      ; preds = %25
  %42 = insertelement <1 x float> undef, float %27, i32 0
  %43 = insertelement <1 x float> undef, float %26, i32 0
  %44 = shufflevector <1 x float> %43, <1 x float> %42, <1 x i32> <i32 1>
  %45 = extractelement <1 x float> %44, i32 0
  %46 = shufflevector <4 x float> %28, <4 x float> %28, <1 x i32> <i32 2>
  %47 = extractelement <1 x float> %46, i32 0
  %48 = insertelement <1 x float> undef, float %47, i32 0
  %49 = shufflevector <1 x float> %42, <1 x float> %48, <1 x i32> <i32 1>
  %50 = extractelement <1 x float> %49, i32 0
  %51 = insertelement <1 x float> undef, float %45, i32 0
  %52 = shufflevector <1 x float> %51, <1 x float> %51, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
  %53 = shufflevector <4 x float> %28, <4 x float> %52, <4 x i32> <i32 0, i32 1, i32 4, i32 3>
  %54 = add i32 %29, 1
  %55 = insertelement <1 x i32> undef, i32 %54, i32 0
  %56 = insertelement <1 x i32> undef, i32 %29, i32 0
  %57 = shufflevector <1 x i32> %56, <1 x i32> %55, <1 x i32> <i32 1>
  %58 = extractelement <1 x i32> %57, i32 0
  br label %25
}
*** IR Dump Before Remove unreachable blocks from the CFG ***
define void @main() {
  call void @llvm.AMDGPU.reserve.reg(i32 0)
  call void @llvm.AMDGPU.reserve.reg(i32 1)
  call void @llvm.AMDGPU.reserve.reg(i32 2)
  call void @llvm.AMDGPU.reserve.reg(i32 3)
  %1 = call float @llvm.AMDGPU.load.const(i32 0)
  %2 = bitcast float %1 to i32
  %3 = call float @llvm.AMDGPU.load.const(i32 4)
  %4 = insertelement <4 x float> undef, float %3, i32 0
  %5 = call float @llvm.AMDGPU.load.const(i32 5)
  %6 = insertelement <4 x float> %4, float %5, i32 1
  %7 = call float @llvm.AMDGPU.load.const(i32 6)
  %8 = insertelement <4 x float> %6, float %7, i32 2
  %9 = call float @llvm.AMDGPU.load.const(i32 7)
  %10 = insertelement <4 x float> %8, float %9, i32 3
  %11 = call float @llvm.R600.load.input(i32 4)
  %12 = insertelement <4 x float> undef, float %11, i32 0
  %13 = call float @llvm.R600.load.input(i32 5)
  %14 = insertelement <4 x float> %12, float %13, i32 1
  %15 = call float @llvm.R600.load.input(i32 6)
  %16 = insertelement <4 x float> %14, float %15, i32 2
  %17 = call float @llvm.R600.load.input(i32 7)
  %18 = insertelement <4 x float> %16, float %17, i32 3
  %19 = shufflevector <4 x float> %10, <4 x float> %10, <1 x i32> <i32 1>
  %20 = extractelement <1 x float> %19, i32 0
  %21 = insertelement <1 x float> undef, float %20, i32 0
  %22 = shufflevector <1 x float> undef, <1 x float> %21, <1 x i32> <i32 1>
  %23 = extractelement <1 x float> %22, i32 0
  %24 = shufflevector <4 x float> undef, <4 x float> %10, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
  br label %25

; <label>:25                                      ; preds = %41, %0
  %26 = phi float [ %45, %41 ], [ undef, %0 ]
  %27 = phi float [ %50, %41 ], [ %23, %0 ]
  %28 = phi <4 x float> [ %53, %41 ], [ %24, %0 ]
  %29 = phi i32 [ %58, %41 ], [ 0, %0 ]
  %30 = icmp sge i32 %29, %2
  br i1 %30, label %31, label %41

; <label>:31                                      ; preds = %25
  %32 = shufflevector <4 x float> undef, <4 x float> %18, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
  %33 = extractelement <4 x float> %28, i32 0
  call void @llvm.AMDGPU.store.output(float %33, i32 8)
  %34 = extractelement <4 x float> %28, i32 1
  call void @llvm.AMDGPU.store.output(float %34, i32 9)
  %35 = extractelement <4 x float> %28, i32 2
  call void @llvm.AMDGPU.store.output(float %35, i32 10)
  %36 = extractelement <4 x float> %28, i32 3
  call void @llvm.AMDGPU.store.output(float %36, i32 11)
  %37 = extractelement <4 x float> %32, i32 0
  call void @llvm.AMDGPU.store.output(float %37, i32 4)
  %38 = extractelement <4 x float> %32, i32 1
  call void @llvm.AMDGPU.store.output(float %38, i32 5)
  %39 = extractelement <4 x float> %32, i32 2
  call void @llvm.AMDGPU.store.output(float %39, i32 6)
  %40 = extractelement <4 x float> %32, i32 3
  call void @llvm.AMDGPU.store.output(float %40, i32 7)
  ret void

; <label>:41                                      ; preds = %25
  %42 = insertelement <1 x float> undef, float %27, i32 0
  %43 = insertelement <1 x float> undef, float %26, i32 0
  %44 = shufflevector <1 x float> %43, <1 x float> %42, <1 x i32> <i32 1>
  %45 = extractelement <1 x float> %44, i32 0
  %46 = shufflevector <4 x float> %28, <4 x float> %28, <1 x i32> <i32 2>
  %47 = extractelement <1 x float> %46, i32 0
  %48 = insertelement <1 x float> undef, float %47, i32 0
  %49 = shufflevector <1 x float> %42, <1 x float> %48, <1 x i32> <i32 1>
  %50 = extractelement <1 x float> %49, i32 0
  %51 = insertelement <1 x float> undef, float %45, i32 0
  %52 = shufflevector <1 x float> %51, <1 x float> %51, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
  %53 = shufflevector <4 x float> %28, <4 x float> %52, <4 x i32> <i32 0, i32 1, i32 4, i32 3>
  %54 = add i32 %29, 1
  %55 = insertelement <1 x i32> undef, i32 %54, i32 0
  %56 = insertelement <1 x i32> undef, i32 %29, i32 0
  %57 = shufflevector <1 x i32> %56, <1 x i32> %55, <1 x i32> <i32 1>
  %58 = extractelement <1 x i32> %57, i32 0
  br label %25
}
*** IR Dump Before Optimize for code generation ***
define void @main() {
  call void @llvm.AMDGPU.reserve.reg(i32 0)
  call void @llvm.AMDGPU.reserve.reg(i32 1)
  call void @llvm.AMDGPU.reserve.reg(i32 2)
  call void @llvm.AMDGPU.reserve.reg(i32 3)
  %1 = call float @llvm.AMDGPU.load.const(i32 0)
  %2 = bitcast float %1 to i32
  %3 = call float @llvm.AMDGPU.load.const(i32 4)
  %4 = insertelement <4 x float> undef, float %3, i32 0
  %5 = call float @llvm.AMDGPU.load.const(i32 5)
  %6 = insertelement <4 x float> %4, float %5, i32 1
  %7 = call float @llvm.AMDGPU.load.const(i32 6)
  %8 = insertelement <4 x float> %6, float %7, i32 2
  %9 = call float @llvm.AMDGPU.load.const(i32 7)
  %10 = insertelement <4 x float> %8, float %9, i32 3
  %11 = call float @llvm.R600.load.input(i32 4)
  %12 = insertelement <4 x float> undef, float %11, i32 0
  %13 = call float @llvm.R600.load.input(i32 5)
  %14 = insertelement <4 x float> %12, float %13, i32 1
  %15 = call float @llvm.R600.load.input(i32 6)
  %16 = insertelement <4 x float> %14, float %15, i32 2
  %17 = call float @llvm.R600.load.input(i32 7)
  %18 = insertelement <4 x float> %16, float %17, i32 3
  %19 = shufflevector <4 x float> %10, <4 x float> %10, <1 x i32> <i32 1>
  %20 = extractelement <1 x float> %19, i32 0
  %21 = insertelement <1 x float> undef, float %20, i32 0
  %22 = shufflevector <1 x float> undef, <1 x float> %21, <1 x i32> <i32 1>
  %23 = extractelement <1 x float> %22, i32 0
  %24 = shufflevector <4 x float> undef, <4 x float> %10, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
  br label %25

; <label>:25                                      ; preds = %41, %0
  %26 = phi float [ %45, %41 ], [ undef, %0 ]
  %27 = phi float [ %50, %41 ], [ %23, %0 ]
  %28 = phi <4 x float> [ %53, %41 ], [ %24, %0 ]
  %29 = phi i32 [ %58, %41 ], [ 0, %0 ]
  %30 = icmp sge i32 %29, %2
  br i1 %30, label %31, label %41

; <label>:31                                      ; preds = %25
  %32 = shufflevector <4 x float> undef, <4 x float> %18, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
  %33 = extractelement <4 x float> %28, i32 0
  call void @llvm.AMDGPU.store.output(float %33, i32 8)
  %34 = extractelement <4 x float> %28, i32 1
  call void @llvm.AMDGPU.store.output(float %34, i32 9)
  %35 = extractelement <4 x float> %28, i32 2
  call void @llvm.AMDGPU.store.output(float %35, i32 10)
  %36 = extractelement <4 x float> %28, i32 3
  call void @llvm.AMDGPU.store.output(float %36, i32 11)
  %37 = extractelement <4 x float> %32, i32 0
  call void @llvm.AMDGPU.store.output(float %37, i32 4)
  %38 = extractelement <4 x float> %32, i32 1
  call void @llvm.AMDGPU.store.output(float %38, i32 5)
  %39 = extractelement <4 x float> %32, i32 2
  call void @llvm.AMDGPU.store.output(float %39, i32 6)
  %40 = extractelement <4 x float> %32, i32 3
  call void @llvm.AMDGPU.store.output(float %40, i32 7)
  ret void

; <label>:41                                      ; preds = %25
  %42 = insertelement <1 x float> undef, float %27, i32 0
  %43 = insertelement <1 x float> undef, float %26, i32 0
  %44 = shufflevector <1 x float> %43, <1 x float> %42, <1 x i32> <i32 1>
  %45 = extractelement <1 x float> %44, i32 0
  %46 = shufflevector <4 x float> %28, <4 x float> %28, <1 x i32> <i32 2>
  %47 = extractelement <1 x float> %46, i32 0
  %48 = insertelement <1 x float> undef, float %47, i32 0
  %49 = shufflevector <1 x float> %42, <1 x float> %48, <1 x i32> <i32 1>
  %50 = extractelement <1 x float> %49, i32 0
  %51 = insertelement <1 x float> undef, float %45, i32 0
  %52 = shufflevector <1 x float> %51, <1 x float> %51, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
  %53 = shufflevector <4 x float> %28, <4 x float> %52, <4 x i32> <i32 0, i32 1, i32 4, i32 3>
  %54 = add i32 %29, 1
  %55 = insertelement <1 x i32> undef, i32 %54, i32 0
  %56 = insertelement <1 x i32> undef, i32 %29, i32 0
  %57 = shufflevector <1 x i32> %56, <1 x i32> %55, <1 x i32> <i32 1>
  %58 = extractelement <1 x i32> %57, i32 0
  br label %25
}
*** IR Dump Before Insert stack protectors ***
define void @main() {
  call void @llvm.AMDGPU.reserve.reg(i32 0)
  call void @llvm.AMDGPU.reserve.reg(i32 1)
  call void @llvm.AMDGPU.reserve.reg(i32 2)
  call void @llvm.AMDGPU.reserve.reg(i32 3)
  %1 = call float @llvm.AMDGPU.load.const(i32 0)
  %2 = bitcast float %1 to i32
  %3 = call float @llvm.AMDGPU.load.const(i32 4)
  %4 = insertelement <4 x float> undef, float %3, i32 0
  %5 = call float @llvm.AMDGPU.load.const(i32 5)
  %6 = insertelement <4 x float> %4, float %5, i32 1
  %7 = call float @llvm.AMDGPU.load.const(i32 6)
  %8 = insertelement <4 x float> %6, float %7, i32 2
  %9 = call float @llvm.AMDGPU.load.const(i32 7)
  %10 = insertelement <4 x float> %8, float %9, i32 3
  %11 = call float @llvm.R600.load.input(i32 4)
  %12 = insertelement <4 x float> undef, float %11, i32 0
  %13 = call float @llvm.R600.load.input(i32 5)
  %14 = insertelement <4 x float> %12, float %13, i32 1
  %15 = call float @llvm.R600.load.input(i32 6)
  %16 = insertelement <4 x float> %14, float %15, i32 2
  %17 = call float @llvm.R600.load.input(i32 7)
  %18 = insertelement <4 x float> %16, float %17, i32 3
  %19 = shufflevector <4 x float> %10, <4 x float> %10, <1 x i32> <i32 1>
  %20 = extractelement <1 x float> %19, i32 0
  %21 = insertelement <1 x float> undef, float %20, i32 0
  %22 = shufflevector <1 x float> undef, <1 x float> %21, <1 x i32> <i32 1>
  %23 = extractelement <1 x float> %22, i32 0
  %24 = shufflevector <4 x float> undef, <4 x float> %10, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
  br label %25

; <label>:25                                      ; preds = %41, %0
  %26 = phi float [ %45, %41 ], [ undef, %0 ]
  %27 = phi float [ %50, %41 ], [ %23, %0 ]
  %28 = phi <4 x float> [ %53, %41 ], [ %24, %0 ]
  %29 = phi i32 [ %58, %41 ], [ 0, %0 ]
  %30 = icmp sge i32 %29, %2
  br i1 %30, label %31, label %41

; <label>:31                                      ; preds = %25
  %32 = shufflevector <4 x float> undef, <4 x float> %18, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
  %33 = extractelement <4 x float> %28, i32 0
  call void @llvm.AMDGPU.store.output(float %33, i32 8)
  %34 = extractelement <4 x float> %28, i32 1
  call void @llvm.AMDGPU.store.output(float %34, i32 9)
  %35 = extractelement <4 x float> %28, i32 2
  call void @llvm.AMDGPU.store.output(float %35, i32 10)
  %36 = extractelement <4 x float> %28, i32 3
  call void @llvm.AMDGPU.store.output(float %36, i32 11)
  %37 = extractelement <4 x float> %32, i32 0
  call void @llvm.AMDGPU.store.output(float %37, i32 4)
  %38 = extractelement <4 x float> %32, i32 1
  call void @llvm.AMDGPU.store.output(float %38, i32 5)
  %39 = extractelement <4 x float> %32, i32 2
  call void @llvm.AMDGPU.store.output(float %39, i32 6)
  %40 = extractelement <4 x float> %32, i32 3
  call void @llvm.AMDGPU.store.output(float %40, i32 7)
  ret void

; <label>:41                                      ; preds = %25
  %42 = insertelement <1 x float> undef, float %27, i32 0
  %43 = insertelement <1 x float> undef, float %26, i32 0
  %44 = shufflevector <1 x float> %43, <1 x float> %42, <1 x i32> <i32 1>
  %45 = extractelement <1 x float> %44, i32 0
  %46 = shufflevector <4 x float> %28, <4 x float> %28, <1 x i32> <i32 2>
  %47 = extractelement <1 x float> %46, i32 0
  %48 = insertelement <1 x float> undef, float %47, i32 0
  %49 = shufflevector <1 x float> %42, <1 x float> %48, <1 x i32> <i32 1>
  %50 = extractelement <1 x float> %49, i32 0
  %51 = insertelement <1 x float> undef, float %45, i32 0
  %52 = shufflevector <1 x float> %51, <1 x float> %51, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
  %53 = shufflevector <4 x float> %28, <4 x float> %52, <4 x i32> <i32 0, i32 1, i32 4, i32 3>
  %54 = add i32 %29, 1
  %55 = insertelement <1 x i32> undef, i32 %54, i32 0
  %56 = insertelement <1 x i32> undef, i32 %29, i32 0
  %57 = shufflevector <1 x i32> %56, <1 x i32> %55, <1 x i32> <i32 1>
  %58 = extractelement <1 x i32> %57, i32 0
  br label %25
}
*** IR Dump Before Preliminary module verification ***
define void @main() {
  call void @llvm.AMDGPU.reserve.reg(i32 0)
  call void @llvm.AMDGPU.reserve.reg(i32 1)
  call void @llvm.AMDGPU.reserve.reg(i32 2)
  call void @llvm.AMDGPU.reserve.reg(i32 3)
  %1 = call float @llvm.AMDGPU.load.const(i32 0)
  %2 = bitcast float %1 to i32
  %3 = call float @llvm.AMDGPU.load.const(i32 4)
  %4 = insertelement <4 x float> undef, float %3, i32 0
  %5 = call float @llvm.AMDGPU.load.const(i32 5)
  %6 = insertelement <4 x float> %4, float %5, i32 1
  %7 = call float @llvm.AMDGPU.load.const(i32 6)
  %8 = insertelement <4 x float> %6, float %7, i32 2
  %9 = call float @llvm.AMDGPU.load.const(i32 7)
  %10 = insertelement <4 x float> %8, float %9, i32 3
  %11 = call float @llvm.R600.load.input(i32 4)
  %12 = insertelement <4 x float> undef, float %11, i32 0
  %13 = call float @llvm.R600.load.input(i32 5)
  %14 = insertelement <4 x float> %12, float %13, i32 1
  %15 = call float @llvm.R600.load.input(i32 6)
  %16 = insertelement <4 x float> %14, float %15, i32 2
  %17 = call float @llvm.R600.load.input(i32 7)
  %18 = insertelement <4 x float> %16, float %17, i32 3
  %19 = shufflevector <4 x float> %10, <4 x float> %10, <1 x i32> <i32 1>
  %20 = extractelement <1 x float> %19, i32 0
  %21 = insertelement <1 x float> undef, float %20, i32 0
  %22 = shufflevector <1 x float> undef, <1 x float> %21, <1 x i32> <i32 1>
  %23 = extractelement <1 x float> %22, i32 0
  %24 = shufflevector <4 x float> undef, <4 x float> %10, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
  br label %25

; <label>:25                                      ; preds = %41, %0
  %26 = phi float [ %45, %41 ], [ undef, %0 ]
  %27 = phi float [ %50, %41 ], [ %23, %0 ]
  %28 = phi <4 x float> [ %53, %41 ], [ %24, %0 ]
  %29 = phi i32 [ %58, %41 ], [ 0, %0 ]
  %30 = icmp sge i32 %29, %2
  br i1 %30, label %31, label %41

; <label>:31                                      ; preds = %25
  %32 = shufflevector <4 x float> undef, <4 x float> %18, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
  %33 = extractelement <4 x float> %28, i32 0
  call void @llvm.AMDGPU.store.output(float %33, i32 8)
  %34 = extractelement <4 x float> %28, i32 1
  call void @llvm.AMDGPU.store.output(float %34, i32 9)
  %35 = extractelement <4 x float> %28, i32 2
  call void @llvm.AMDGPU.store.output(float %35, i32 10)
  %36 = extractelement <4 x float> %28, i32 3
  call void @llvm.AMDGPU.store.output(float %36, i32 11)
  %37 = extractelement <4 x float> %32, i32 0
  call void @llvm.AMDGPU.store.output(float %37, i32 4)
  %38 = extractelement <4 x float> %32, i32 1
  call void @llvm.AMDGPU.store.output(float %38, i32 5)
  %39 = extractelement <4 x float> %32, i32 2
  call void @llvm.AMDGPU.store.output(float %39, i32 6)
  %40 = extractelement <4 x float> %32, i32 3
  call void @llvm.AMDGPU.store.output(float %40, i32 7)
  ret void

; <label>:41                                      ; preds = %25
  %42 = insertelement <1 x float> undef, float %27, i32 0
  %43 = insertelement <1 x float> undef, float %26, i32 0
  %44 = shufflevector <1 x float> %43, <1 x float> %42, <1 x i32> <i32 1>
  %45 = extractelement <1 x float> %44, i32 0
  %46 = shufflevector <4 x float> %28, <4 x float> %28, <1 x i32> <i32 2>
  %47 = extractelement <1 x float> %46, i32 0
  %48 = insertelement <1 x float> undef, float %47, i32 0
  %49 = shufflevector <1 x float> %42, <1 x float> %48, <1 x i32> <i32 1>
  %50 = extractelement <1 x float> %49, i32 0
  %51 = insertelement <1 x float> undef, float %45, i32 0
  %52 = shufflevector <1 x float> %51, <1 x float> %51, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
  %53 = shufflevector <4 x float> %28, <4 x float> %52, <4 x i32> <i32 0, i32 1, i32 4, i32 3>
  %54 = add i32 %29, 1
  %55 = insertelement <1 x i32> undef, i32 %54, i32 0
  %56 = insertelement <1 x i32> undef, i32 %29, i32 0
  %57 = shufflevector <1 x i32> %56, <1 x i32> %55, <1 x i32> <i32 1>
  %58 = extractelement <1 x i32> %57, i32 0
  br label %25
}
*** IR Dump Before Module Verifier ***
define void @main() {
  call void @llvm.AMDGPU.reserve.reg(i32 0)
  call void @llvm.AMDGPU.reserve.reg(i32 1)
  call void @llvm.AMDGPU.reserve.reg(i32 2)
  call void @llvm.AMDGPU.reserve.reg(i32 3)
  %1 = call float @llvm.AMDGPU.load.const(i32 0)
  %2 = bitcast float %1 to i32
  %3 = call float @llvm.AMDGPU.load.const(i32 4)
  %4 = insertelement <4 x float> undef, float %3, i32 0
  %5 = call float @llvm.AMDGPU.load.const(i32 5)
  %6 = insertelement <4 x float> %4, float %5, i32 1
  %7 = call float @llvm.AMDGPU.load.const(i32 6)
  %8 = insertelement <4 x float> %6, float %7, i32 2
  %9 = call float @llvm.AMDGPU.load.const(i32 7)
  %10 = insertelement <4 x float> %8, float %9, i32 3
  %11 = call float @llvm.R600.load.input(i32 4)
  %12 = insertelement <4 x float> undef, float %11, i32 0
  %13 = call float @llvm.R600.load.input(i32 5)
  %14 = insertelement <4 x float> %12, float %13, i32 1
  %15 = call float @llvm.R600.load.input(i32 6)
  %16 = insertelement <4 x float> %14, float %15, i32 2
  %17 = call float @llvm.R600.load.input(i32 7)
  %18 = insertelement <4 x float> %16, float %17, i32 3
  %19 = shufflevector <4 x float> %10, <4 x float> %10, <1 x i32> <i32 1>
  %20 = extractelement <1 x float> %19, i32 0
  %21 = insertelement <1 x float> undef, float %20, i32 0
  %22 = shufflevector <1 x float> undef, <1 x float> %21, <1 x i32> <i32 1>
  %23 = extractelement <1 x float> %22, i32 0
  %24 = shufflevector <4 x float> undef, <4 x float> %10, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
  br label %25

; <label>:25                                      ; preds = %41, %0
  %26 = phi float [ %45, %41 ], [ undef, %0 ]
  %27 = phi float [ %50, %41 ], [ %23, %0 ]
  %28 = phi <4 x float> [ %53, %41 ], [ %24, %0 ]
  %29 = phi i32 [ %58, %41 ], [ 0, %0 ]
  %30 = icmp sge i32 %29, %2
  br i1 %30, label %31, label %41

; <label>:31                                      ; preds = %25
  %32 = shufflevector <4 x float> undef, <4 x float> %18, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
  %33 = extractelement <4 x float> %28, i32 0
  call void @llvm.AMDGPU.store.output(float %33, i32 8)
  %34 = extractelement <4 x float> %28, i32 1
  call void @llvm.AMDGPU.store.output(float %34, i32 9)
  %35 = extractelement <4 x float> %28, i32 2
  call void @llvm.AMDGPU.store.output(float %35, i32 10)
  %36 = extractelement <4 x float> %28, i32 3
  call void @llvm.AMDGPU.store.output(float %36, i32 11)
  %37 = extractelement <4 x float> %32, i32 0
  call void @llvm.AMDGPU.store.output(float %37, i32 4)
  %38 = extractelement <4 x float> %32, i32 1
  call void @llvm.AMDGPU.store.output(float %38, i32 5)
  %39 = extractelement <4 x float> %32, i32 2
  call void @llvm.AMDGPU.store.output(float %39, i32 6)
  %40 = extractelement <4 x float> %32, i32 3
  call void @llvm.AMDGPU.store.output(float %40, i32 7)
  ret void

; <label>:41                                      ; preds = %25
  %42 = insertelement <1 x float> undef, float %27, i32 0
  %43 = insertelement <1 x float> undef, float %26, i32 0
  %44 = shufflevector <1 x float> %43, <1 x float> %42, <1 x i32> <i32 1>
  %45 = extractelement <1 x float> %44, i32 0
  %46 = shufflevector <4 x float> %28, <4 x float> %28, <1 x i32> <i32 2>
  %47 = extractelement <1 x float> %46, i32 0
  %48 = insertelement <1 x float> undef, float %47, i32 0
  %49 = shufflevector <1 x float> %42, <1 x float> %48, <1 x i32> <i32 1>
  %50 = extractelement <1 x float> %49, i32 0
  %51 = insertelement <1 x float> undef, float %45, i32 0
  %52 = shufflevector <1 x float> %51, <1 x float> %51, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
  %53 = shufflevector <4 x float> %28, <4 x float> %52, <4 x i32> <i32 0, i32 1, i32 4, i32 3>
  %54 = add i32 %29, 1
  %55 = insertelement <1 x i32> undef, i32 %54, i32 0
  %56 = insertelement <1 x i32> undef, i32 %29, i32 0
  %57 = shufflevector <1 x i32> %56, <1 x i32> %55, <1 x i32> <i32 1>
  %58 = extractelement <1 x i32> %57, i32 0
  br label %25
}
# *** IR Dump Before Expand ISel Pseudo-instructions ***:
# Machine code for function main: SSA
Function Live Ins: %T1_X in %vreg14, %T1_Y in %vreg15, %T1_Z in %vreg16, %T1_W in %vreg17
Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X

BB#0: derived from LLVM BB %0
    Live Ins: %T1_X %T1_Y %T1_Z %T1_W
%vreg17<def> = COPY %T1_W; R600_TReg32:%vreg17
%vreg16<def> = COPY %T1_Z; R600_TReg32:%vreg16
%vreg15<def> = COPY %T1_Y; R600_TReg32:%vreg15
%vreg14<def> = COPY %T1_X; R600_TReg32:%vreg14
%vreg18<def> = R600_LOAD_CONST 4; R600_Reg32:%vreg18
%vreg20<def> = IMPLICIT_DEF; R600_Reg128:%vreg20
%vreg19<def,tied1> = INSERT_SUBREG %vreg20<tied0>, %vreg14, sel_x; R600_Reg128:%vreg19,%vreg20 R600_TReg32:%vreg14
%vreg2<def> = R600_LOAD_CONST 5; R600_Reg32:%vreg2
%vreg22<def> = IMPLICIT_DEF; R600_Reg128:%vreg22
%vreg21<def,tied1> = INSERT_SUBREG %vreg22<tied0>, %vreg18<kill>, sel_x; R600_Reg128:%vreg21,%vreg22 R600_Reg32:%vreg18
RESERVE_REG 0
%vreg23<def,tied1> = INSERT_SUBREG %vreg19<tied0>, %vreg15, sel_y; R600_Reg128:%vreg23,%vreg19 R600_TReg32:%vreg15
%vreg24<def,tied1> = INSERT_SUBREG %vreg21<tied0>, %vreg2, sel_y; R600_Reg128:%vreg24,%vreg21 R600_Reg32:%vreg2
%vreg25<def> = R600_LOAD_CONST 6; R600_Reg32:%vreg25
%vreg26<def,tied1> = INSERT_SUBREG %vreg23<tied0>, %vreg16, sel_z; R600_Reg128:%vreg26,%vreg23 R600_TReg32:%vreg16
RESERVE_REG 1
%vreg27<def,tied1> = INSERT_SUBREG %vreg24<tied0>, %vreg25<kill>, sel_z; R600_Reg128:%vreg27,%vreg24 R600_Reg32:%vreg25
%vreg28<def> = R600_LOAD_CONST 7; R600_Reg32:%vreg28
RESERVE_REG 2
%vreg3<def,tied1> = INSERT_SUBREG %vreg27<tied0>, %vreg28<kill>, sel_w; R600_Reg128:%vreg3,%vreg27 R600_Reg32:%vreg28
%vreg1<def,tied1> = INSERT_SUBREG %vreg26<tied0>, %vreg17, sel_w; R600_Reg128:%vreg1,%vreg26 R600_TReg32:%vreg17
%vreg13<def> = MOV_IMM_I32 0; R600_Reg32:%vreg13
%vreg0<def> = R600_LOAD_CONST 0; R600_Reg32:%vreg0
%vreg12<def> = IMPLICIT_DEF; R600_Reg32:%vreg12
RESERVE_REG 3
    Successors according to CFG: BB#1

BB#1: derived from LLVM BB %25
    Predecessors according to CFG: BB#0 BB#3
%vreg4<def> = PHI %vreg12, <BB#0>, %vreg8, <BB#3>; R600_Reg32:%vreg4,%vreg12,%vreg8
%vreg5<def> = PHI %vreg2, <BB#0>, %vreg9, <BB#3>; R600_Reg32:%vreg5,%vreg2,%vreg9
%vreg6<def> = PHI %vreg3, <BB#0>, %vreg10, <BB#3>; R600_Reg128:%vreg6,%vreg3,%vreg10
%vreg7<def> = PHI %vreg13, <BB#0>, %vreg11, <BB#3>; R600_Reg32:%vreg7,%vreg13,%vreg11
%vreg29<def> = SETGT_INT 0, 0, 1, 0, 0, 0, %vreg0, 0, 0, 0, %vreg7, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg29,%vreg0,%vreg7
%vreg30<def> = COPY %vreg29; GPRI32:%vreg30 R600_Reg32:%vreg29
BRANCH_COND_i32 <BB#3>, %vreg30<kill>; GPRI32:%vreg30
BRANCH <BB#2>
    Successors according to CFG: BB#2(4) BB#3(124)

BB#2: derived from LLVM BB %31
    Predecessors according to CFG: BB#1
%vreg39<def> = COPY %vreg6:sel_x; R600_Reg32:%vreg39 R600_Reg128:%vreg6
%T2_X<def> = COPY %vreg39; R600_Reg32:%vreg39
%vreg40<def> = COPY %vreg6:sel_y; R600_Reg32:%vreg40 R600_Reg128:%vreg6
%T2_Y<def> = COPY %vreg40; R600_Reg32:%vreg40
%vreg41<def> = COPY %vreg6:sel_z; R600_Reg32:%vreg41 R600_Reg128:%vreg6
%T2_Z<def> = COPY %vreg41; R600_Reg32:%vreg41
%vreg42<def> = COPY %vreg6:sel_w; R600_Reg32:%vreg42 R600_Reg128:%vreg6
%T2_W<def> = COPY %vreg42; R600_Reg32:%vreg42
%vreg43<def> = COPY %vreg1:sel_x; R600_Reg32:%vreg43 R600_Reg128:%vreg1
%T1_X<def> = COPY %vreg43; R600_Reg32:%vreg43
%vreg44<def> = COPY %vreg1:sel_y; R600_Reg32:%vreg44 R600_Reg128:%vreg1
%T1_Y<def> = COPY %vreg44; R600_Reg32:%vreg44
%vreg45<def> = COPY %vreg1:sel_z; R600_Reg32:%vreg45 R600_Reg128:%vreg1
%T1_Z<def> = COPY %vreg45; R600_Reg32:%vreg45
%vreg46<def> = COPY %vreg1:sel_w; R600_Reg32:%vreg46 R600_Reg128:%vreg1
%T1_W<def> = COPY %vreg46; R600_Reg32:%vreg46
RETURN

BB#3: derived from LLVM BB %41
    Predecessors according to CFG: BB#1
%vreg31<def> = COPY %vreg6:sel_x; R600_Reg32:%vreg31 R600_Reg128:%vreg6
%vreg33<def> = IMPLICIT_DEF; R600_Reg128:%vreg33
%vreg32<def,tied1> = INSERT_SUBREG %vreg33<tied0>, %vreg31<kill>, sel_x; R600_Reg128:%vreg32,%vreg33 R600_Reg32:%vreg31
%vreg34<def> = COPY %vreg6:sel_y; R600_Reg32:%vreg34 R600_Reg128:%vreg6
%vreg35<def,tied1> = INSERT_SUBREG %vreg32<tied0>, %vreg34<kill>, sel_y; R600_Reg128:%vreg35,%vreg32 R600_Reg32:%vreg34
%vreg36<def,tied1> = INSERT_SUBREG %vreg35<tied0>, %vreg5, sel_z; R600_Reg128:%vreg36,%vreg35 R600_Reg32:%vreg5
%vreg37<def> = COPY %vreg6:sel_w; R600_Reg32:%vreg37 R600_Reg128:%vreg6
%vreg10<def,tied1> = INSERT_SUBREG %vreg36<tied0>, %vreg37<kill>, sel_w; R600_Reg128:%vreg10,%vreg36 R600_Reg32:%vreg37
%vreg9<def> = COPY %vreg6:sel_z; R600_Reg32:%vreg9 R600_Reg128:%vreg6
%vreg38<def> = MOV_IMM_I32 1; R600_Reg32:%vreg38
%vreg11<def> = ADD_INT 0, 0, 1, 0, 0, 0, %vreg7, 0, 0, 0, %vreg38<kill>, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg11,%vreg7,%vreg38
%vreg8<def> = COPY %vreg5; R600_Reg32:%vreg8,%vreg5
BRANCH <BB#1>
    Successors according to CFG: BB#1

# End machine code for function main.

# *** IR Dump Before Tail Duplication ***:
# Machine code for function main: SSA
Function Live Ins: %T1_X in %vreg14, %T1_Y in %vreg15, %T1_Z in %vreg16, %T1_W in %vreg17
Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X

BB#0: derived from LLVM BB %0
    Live Ins: %T1_X %T1_Y %T1_Z %T1_W
%vreg17<def> = COPY %T1_W; R600_TReg32:%vreg17
%vreg16<def> = COPY %T1_Z; R600_TReg32:%vreg16
%vreg15<def> = COPY %T1_Y; R600_TReg32:%vreg15
%vreg14<def> = COPY %T1_X; R600_TReg32:%vreg14
%vreg18<def> = COPY %C1_X; R600_Reg32:%vreg18
%vreg20<def> = IMPLICIT_DEF; R600_Reg128:%vreg20
%vreg19<def,tied1> = INSERT_SUBREG %vreg20<tied0>, %vreg14, sel_x; R600_Reg128:%vreg19,%vreg20 R600_TReg32:%vreg14
%vreg2<def> = COPY %C1_Y; R600_Reg32:%vreg2
%vreg22<def> = IMPLICIT_DEF; R600_Reg128:%vreg22
%vreg21<def,tied1> = INSERT_SUBREG %vreg22<tied0>, %vreg18<kill>, sel_x; R600_Reg128:%vreg21,%vreg22 R600_Reg32:%vreg18
%vreg23<def,tied1> = INSERT_SUBREG %vreg19<tied0>, %vreg15, sel_y; R600_Reg128:%vreg23,%vreg19 R600_TReg32:%vreg15
%vreg24<def,tied1> = INSERT_SUBREG %vreg21<tied0>, %vreg2, sel_y; R600_Reg128:%vreg24,%vreg21 R600_Reg32:%vreg2
%vreg25<def> = COPY %C1_Z; R600_Reg32:%vreg25
%vreg26<def,tied1> = INSERT_SUBREG %vreg23<tied0>, %vreg16, sel_z; R600_Reg128:%vreg26,%vreg23 R600_TReg32:%vreg16
%vreg27<def,tied1> = INSERT_SUBREG %vreg24<tied0>, %vreg25<kill>, sel_z; R600_Reg128:%vreg27,%vreg24 R600_Reg32:%vreg25
%vreg28<def> = COPY %C1_W; R600_Reg32:%vreg28
%vreg3<def,tied1> = INSERT_SUBREG %vreg27<tied0>, %vreg28<kill>, sel_w; R600_Reg128:%vreg3,%vreg27 R600_Reg32:%vreg28
%vreg1<def,tied1> = INSERT_SUBREG %vreg26<tied0>, %vreg17, sel_w; R600_Reg128:%vreg1,%vreg26 R600_TReg32:%vreg17
%vreg13<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg13
%vreg0<def> = COPY %C0_X; R600_Reg32:%vreg0
%vreg12<def> = IMPLICIT_DEF; R600_Reg32:%vreg12
    Successors according to CFG: BB#1

BB#1: derived from LLVM BB %25
    Predecessors according to CFG: BB#0 BB#3
%vreg4<def> = PHI %vreg12, <BB#0>, %vreg8, <BB#3>; R600_Reg32:%vreg4,%vreg12,%vreg8
%vreg5<def> = PHI %vreg2, <BB#0>, %vreg9, <BB#3>; R600_Reg32:%vreg5,%vreg2,%vreg9
%vreg6<def> = PHI %vreg3, <BB#0>, %vreg10, <BB#3>; R600_Reg128:%vreg6,%vreg3,%vreg10
%vreg7<def> = PHI %vreg13, <BB#0>, %vreg11, <BB#3>; R600_Reg32:%vreg7,%vreg13,%vreg11
%vreg29<def> = SETGT_INT 0, 0, 1, 0, 0, 0, %vreg0, 0, 0, 0, %vreg7, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg29,%vreg0,%vreg7
%vreg30<def> = COPY %vreg29; GPRI32:%vreg30 R600_Reg32:%vreg29
%PREDICATE_BIT<def> = PRED_X %vreg30<kill>, 152, 16; GPRI32:%vreg30
JUMP <BB#3>, pred:%PREDICATE_BIT<kill>
JUMP <BB#2>, pred:%noreg
    Successors according to CFG: BB#2(4) BB#3(124)

BB#2: derived from LLVM BB %31
    Predecessors according to CFG: BB#1
%vreg39<def> = COPY %vreg6:sel_x; R600_Reg32:%vreg39 R600_Reg128:%vreg6
%T2_X<def> = COPY %vreg39; R600_Reg32:%vreg39
%vreg40<def> = COPY %vreg6:sel_y; R600_Reg32:%vreg40 R600_Reg128:%vreg6
%T2_Y<def> = COPY %vreg40; R600_Reg32:%vreg40
%vreg41<def> = COPY %vreg6:sel_z; R600_Reg32:%vreg41 R600_Reg128:%vreg6
%T2_Z<def> = COPY %vreg41; R600_Reg32:%vreg41
%vreg42<def> = COPY %vreg6:sel_w; R600_Reg32:%vreg42 R600_Reg128:%vreg6
%T2_W<def> = COPY %vreg42; R600_Reg32:%vreg42
%vreg43<def> = COPY %vreg1:sel_x; R600_Reg32:%vreg43 R600_Reg128:%vreg1
%T1_X<def> = COPY %vreg43; R600_Reg32:%vreg43
%vreg44<def> = COPY %vreg1:sel_y; R600_Reg32:%vreg44 R600_Reg128:%vreg1
%T1_Y<def> = COPY %vreg44; R600_Reg32:%vreg44
%vreg45<def> = COPY %vreg1:sel_z; R600_Reg32:%vreg45 R600_Reg128:%vreg1
%T1_Z<def> = COPY %vreg45; R600_Reg32:%vreg45
%vreg46<def> = COPY %vreg1:sel_w; R600_Reg32:%vreg46 R600_Reg128:%vreg1
%T1_W<def> = COPY %vreg46; R600_Reg32:%vreg46
RETURN

BB#3: derived from LLVM BB %41
    Predecessors according to CFG: BB#1
%vreg31<def> = COPY %vreg6:sel_x; R600_Reg32:%vreg31 R600_Reg128:%vreg6
%vreg33<def> = IMPLICIT_DEF; R600_Reg128:%vreg33
%vreg32<def,tied1> = INSERT_SUBREG %vreg33<tied0>, %vreg31<kill>, sel_x; R600_Reg128:%vreg32,%vreg33 R600_Reg32:%vreg31
%vreg34<def> = COPY %vreg6:sel_y; R600_Reg32:%vreg34 R600_Reg128:%vreg6
%vreg35<def,tied1> = INSERT_SUBREG %vreg32<tied0>, %vreg34<kill>, sel_y; R600_Reg128:%vreg35,%vreg32 R600_Reg32:%vreg34
%vreg36<def,tied1> = INSERT_SUBREG %vreg35<tied0>, %vreg5, sel_z; R600_Reg128:%vreg36,%vreg35 R600_Reg32:%vreg5
%vreg37<def> = COPY %vreg6:sel_w; R600_Reg32:%vreg37 R600_Reg128:%vreg6
%vreg10<def,tied1> = INSERT_SUBREG %vreg36<tied0>, %vreg37<kill>, sel_w; R600_Reg128:%vreg10,%vreg36 R600_Reg32:%vreg37
%vreg9<def> = COPY %vreg6:sel_z; R600_Reg32:%vreg9 R600_Reg128:%vreg6
%vreg38<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 1; R600_Reg32:%vreg38
%vreg11<def> = ADD_INT 0, 0, 1, 0, 0, 0, %vreg7, 0, 0, 0, %vreg38<kill>, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg11,%vreg7,%vreg38
%vreg8<def> = COPY %vreg5; R600_Reg32:%vreg8,%vreg5
JUMP <BB#1>, pred:%noreg
    Successors according to CFG: BB#1

# End machine code for function main.

# *** IR Dump Before Optimize machine instruction PHIs ***:
# Machine code for function main: SSA
Function Live Ins: %T1_X in %vreg14, %T1_Y in %vreg15, %T1_Z in %vreg16, %T1_W in %vreg17
Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X

BB#0: derived from LLVM BB %0
    Live Ins: %T1_X %T1_Y %T1_Z %T1_W
%vreg17<def> = COPY %T1_W; R600_TReg32:%vreg17
%vreg16<def> = COPY %T1_Z; R600_TReg32:%vreg16
%vreg15<def> = COPY %T1_Y; R600_TReg32:%vreg15
%vreg14<def> = COPY %T1_X; R600_TReg32:%vreg14
%vreg18<def> = COPY %C1_X; R600_Reg32:%vreg18
%vreg20<def> = IMPLICIT_DEF; R600_Reg128:%vreg20
%vreg19<def,tied1> = INSERT_SUBREG %vreg20<tied0>, %vreg14, sel_x; R600_Reg128:%vreg19,%vreg20 R600_TReg32:%vreg14
%vreg2<def> = COPY %C1_Y; R600_Reg32:%vreg2
%vreg22<def> = IMPLICIT_DEF; R600_Reg128:%vreg22
%vreg21<def,tied1> = INSERT_SUBREG %vreg22<tied0>, %vreg18<kill>, sel_x; R600_Reg128:%vreg21,%vreg22 R600_Reg32:%vreg18
%vreg23<def,tied1> = INSERT_SUBREG %vreg19<tied0>, %vreg15, sel_y; R600_Reg128:%vreg23,%vreg19 R600_TReg32:%vreg15
%vreg24<def,tied1> = INSERT_SUBREG %vreg21<tied0>, %vreg2, sel_y; R600_Reg128:%vreg24,%vreg21 R600_Reg32:%vreg2
%vreg25<def> = COPY %C1_Z; R600_Reg32:%vreg25
%vreg26<def,tied1> = INSERT_SUBREG %vreg23<tied0>, %vreg16, sel_z; R600_Reg128:%vreg26,%vreg23 R600_TReg32:%vreg16
%vreg27<def,tied1> = INSERT_SUBREG %vreg24<tied0>, %vreg25<kill>, sel_z; R600_Reg128:%vreg27,%vreg24 R600_Reg32:%vreg25
%vreg28<def> = COPY %C1_W; R600_Reg32:%vreg28
%vreg3<def,tied1> = INSERT_SUBREG %vreg27<tied0>, %vreg28<kill>, sel_w; R600_Reg128:%vreg3,%vreg27 R600_Reg32:%vreg28
%vreg1<def,tied1> = INSERT_SUBREG %vreg26<tied0>, %vreg17, sel_w; R600_Reg128:%vreg1,%vreg26 R600_TReg32:%vreg17
%vreg13<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg13
%vreg0<def> = COPY %C0_X; R600_Reg32:%vreg0
%vreg12<def> = IMPLICIT_DEF; R600_Reg32:%vreg12
    Successors according to CFG: BB#1

BB#1: derived from LLVM BB %25
    Predecessors according to CFG: BB#0 BB#3
%vreg4<def> = PHI %vreg12, <BB#0>, %vreg8, <BB#3>; R600_Reg32:%vreg4,%vreg12,%vreg8
%vreg5<def> = PHI %vreg2, <BB#0>, %vreg9, <BB#3>; R600_Reg32:%vreg5,%vreg2,%vreg9
%vreg6<def> = PHI %vreg3, <BB#0>, %vreg10, <BB#3>; R600_Reg128:%vreg6,%vreg3,%vreg10
%vreg7<def> = PHI %vreg13, <BB#0>, %vreg11, <BB#3>; R600_Reg32:%vreg7,%vreg13,%vreg11
%vreg29<def> = SETGT_INT 0, 0, 1, 0, 0, 0, %vreg0, 0, 0, 0, %vreg7, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg29,%vreg0,%vreg7
%vreg30<def> = COPY %vreg29; GPRI32:%vreg30 R600_Reg32:%vreg29
%PREDICATE_BIT<def> = PRED_X %vreg30<kill>, 152, 16; GPRI32:%vreg30
JUMP <BB#3>, pred:%PREDICATE_BIT<kill>
JUMP <BB#2>, pred:%noreg
    Successors according to CFG: BB#2(4) BB#3(124)

BB#2: derived from LLVM BB %31
    Predecessors according to CFG: BB#1
%vreg39<def> = COPY %vreg6:sel_x; R600_Reg32:%vreg39 R600_Reg128:%vreg6
%T2_X<def> = COPY %vreg39; R600_Reg32:%vreg39
%vreg40<def> = COPY %vreg6:sel_y; R600_Reg32:%vreg40 R600_Reg128:%vreg6
%T2_Y<def> = COPY %vreg40; R600_Reg32:%vreg40
%vreg41<def> = COPY %vreg6:sel_z; R600_Reg32:%vreg41 R600_Reg128:%vreg6
%T2_Z<def> = COPY %vreg41; R600_Reg32:%vreg41
%vreg42<def> = COPY %vreg6:sel_w; R600_Reg32:%vreg42 R600_Reg128:%vreg6
%T2_W<def> = COPY %vreg42; R600_Reg32:%vreg42
%vreg43<def> = COPY %vreg1:sel_x; R600_Reg32:%vreg43 R600_Reg128:%vreg1
%T1_X<def> = COPY %vreg43; R600_Reg32:%vreg43
%vreg44<def> = COPY %vreg1:sel_y; R600_Reg32:%vreg44 R600_Reg128:%vreg1
%T1_Y<def> = COPY %vreg44; R600_Reg32:%vreg44
%vreg45<def> = COPY %vreg1:sel_z; R600_Reg32:%vreg45 R600_Reg128:%vreg1
%T1_Z<def> = COPY %vreg45; R600_Reg32:%vreg45
%vreg46<def> = COPY %vreg1:sel_w; R600_Reg32:%vreg46 R600_Reg128:%vreg1
%T1_W<def> = COPY %vreg46; R600_Reg32:%vreg46
RETURN

BB#3: derived from LLVM BB %41
    Predecessors according to CFG: BB#1
%vreg31<def> = COPY %vreg6:sel_x; R600_Reg32:%vreg31 R600_Reg128:%vreg6
%vreg33<def> = IMPLICIT_DEF; R600_Reg128:%vreg33
%vreg32<def,tied1> = INSERT_SUBREG %vreg33<tied0>, %vreg31<kill>, sel_x; R600_Reg128:%vreg32,%vreg33 R600_Reg32:%vreg31
%vreg34<def> = COPY %vreg6:sel_y; R600_Reg32:%vreg34 R600_Reg128:%vreg6
%vreg35<def,tied1> = INSERT_SUBREG %vreg32<tied0>, %vreg34<kill>, sel_y; R600_Reg128:%vreg35,%vreg32 R600_Reg32:%vreg34
%vreg36<def,tied1> = INSERT_SUBREG %vreg35<tied0>, %vreg5, sel_z; R600_Reg128:%vreg36,%vreg35 R600_Reg32:%vreg5
%vreg37<def> = COPY %vreg6:sel_w; R600_Reg32:%vreg37 R600_Reg128:%vreg6
%vreg10<def,tied1> = INSERT_SUBREG %vreg36<tied0>, %vreg37<kill>, sel_w; R600_Reg128:%vreg10,%vreg36 R600_Reg32:%vreg37
%vreg9<def> = COPY %vreg6:sel_z; R600_Reg32:%vreg9 R600_Reg128:%vreg6
%vreg38<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 1; R600_Reg32:%vreg38
%vreg11<def> = ADD_INT 0, 0, 1, 0, 0, 0, %vreg7, 0, 0, 0, %vreg38<kill>, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg11,%vreg7,%vreg38
%vreg8<def> = COPY %vreg5; R600_Reg32:%vreg8,%vreg5
JUMP <BB#1>, pred:%noreg
    Successors according to CFG: BB#1

# End machine code for function main.

# *** IR Dump Before Slot index numbering ***:
# Machine code for function main: SSA
Function Live Ins: %T1_X in %vreg14, %T1_Y in %vreg15, %T1_Z in %vreg16, %T1_W in %vreg17
Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X

BB#0: derived from LLVM BB %0
    Live Ins: %T1_X %T1_Y %T1_Z %T1_W
%vreg17<def> = COPY %T1_W; R600_TReg32:%vreg17
%vreg16<def> = COPY %T1_Z; R600_TReg32:%vreg16
%vreg15<def> = COPY %T1_Y; R600_TReg32:%vreg15
%vreg14<def> = COPY %T1_X; R600_TReg32:%vreg14
%vreg18<def> = COPY %C1_X; R600_Reg32:%vreg18
%vreg20<def> = IMPLICIT_DEF; R600_Reg128:%vreg20
%vreg19<def,tied1> = INSERT_SUBREG %vreg20<tied0>, %vreg14, sel_x; R600_Reg128:%vreg19,%vreg20 R600_TReg32:%vreg14
%vreg2<def> = COPY %C1_Y; R600_Reg32:%vreg2
%vreg22<def> = IMPLICIT_DEF; R600_Reg128:%vreg22
%vreg21<def,tied1> = INSERT_SUBREG %vreg22<tied0>, %vreg18<kill>, sel_x; R600_Reg128:%vreg21,%vreg22 R600_Reg32:%vreg18
%vreg23<def,tied1> = INSERT_SUBREG %vreg19<tied0>, %vreg15, sel_y; R600_Reg128:%vreg23,%vreg19 R600_TReg32:%vreg15
%vreg24<def,tied1> = INSERT_SUBREG %vreg21<tied0>, %vreg2, sel_y; R600_Reg128:%vreg24,%vreg21 R600_Reg32:%vreg2
%vreg25<def> = COPY %C1_Z; R600_Reg32:%vreg25
%vreg26<def,tied1> = INSERT_SUBREG %vreg23<tied0>, %vreg16, sel_z; R600_Reg128:%vreg26,%vreg23 R600_TReg32:%vreg16
%vreg27<def,tied1> = INSERT_SUBREG %vreg24<tied0>, %vreg25<kill>, sel_z; R600_Reg128:%vreg27,%vreg24 R600_Reg32:%vreg25
%vreg28<def> = COPY %C1_W; R600_Reg32:%vreg28
%vreg3<def,tied1> = INSERT_SUBREG %vreg27<tied0>, %vreg28<kill>, sel_w; R600_Reg128:%vreg3,%vreg27 R600_Reg32:%vreg28
%vreg1<def,tied1> = INSERT_SUBREG %vreg26<tied0>, %vreg17, sel_w; R600_Reg128:%vreg1,%vreg26 R600_TReg32:%vreg17
%vreg13<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg13
%vreg0<def> = COPY %C0_X; R600_Reg32:%vreg0
%vreg12<def> = IMPLICIT_DEF; R600_Reg32:%vreg12
    Successors according to CFG: BB#1

BB#1: derived from LLVM BB %25
    Predecessors according to CFG: BB#0 BB#3
%vreg5<def> = PHI %vreg2, <BB#0>, %vreg9, <BB#3>; R600_Reg32:%vreg5,%vreg2,%vreg9
%vreg6<def> = PHI %vreg3, <BB#0>, %vreg10, <BB#3>; R600_Reg128:%vreg6,%vreg3,%vreg10
%vreg7<def> = PHI %vreg13, <BB#0>, %vreg11, <BB#3>; R600_Reg32:%vreg7,%vreg13,%vreg11
%vreg29<def> = SETGT_INT 0, 0, 1, 0, 0, 0, %vreg0, 0, 0, 0, %vreg7, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg29,%vreg0,%vreg7
%vreg30<def> = COPY %vreg29; GPRI32:%vreg30 R600_Reg32:%vreg29
%PREDICATE_BIT<def> = PRED_X %vreg30<kill>, 152, 16; GPRI32:%vreg30
JUMP <BB#3>, pred:%PREDICATE_BIT<kill>
JUMP <BB#2>, pred:%noreg
    Successors according to CFG: BB#2(4) BB#3(124)

BB#2: derived from LLVM BB %31
    Predecessors according to CFG: BB#1
%vreg39<def> = COPY %vreg6:sel_x; R600_Reg32:%vreg39 R600_Reg128:%vreg6
%T2_X<def> = COPY %vreg39; R600_Reg32:%vreg39
%vreg40<def> = COPY %vreg6:sel_y; R600_Reg32:%vreg40 R600_Reg128:%vreg6
%T2_Y<def> = COPY %vreg40; R600_Reg32:%vreg40
%vreg41<def> = COPY %vreg6:sel_z; R600_Reg32:%vreg41 R600_Reg128:%vreg6
%T2_Z<def> = COPY %vreg41; R600_Reg32:%vreg41
%vreg42<def> = COPY %vreg6:sel_w; R600_Reg32:%vreg42 R600_Reg128:%vreg6
%T2_W<def> = COPY %vreg42; R600_Reg32:%vreg42
%vreg43<def> = COPY %vreg1:sel_x; R600_Reg32:%vreg43 R600_Reg128:%vreg1
%T1_X<def> = COPY %vreg43; R600_Reg32:%vreg43
%vreg44<def> = COPY %vreg1:sel_y; R600_Reg32:%vreg44 R600_Reg128:%vreg1
%T1_Y<def> = COPY %vreg44; R600_Reg32:%vreg44
%vreg45<def> = COPY %vreg1:sel_z; R600_Reg32:%vreg45 R600_Reg128:%vreg1
%T1_Z<def> = COPY %vreg45; R600_Reg32:%vreg45
%vreg46<def> = COPY %vreg1:sel_w; R600_Reg32:%vreg46 R600_Reg128:%vreg1
%T1_W<def> = COPY %vreg46; R600_Reg32:%vreg46
RETURN

BB#3: derived from LLVM BB %41
    Predecessors according to CFG: BB#1
%vreg31<def> = COPY %vreg6:sel_x; R600_Reg32:%vreg31 R600_Reg128:%vreg6
%vreg33<def> = IMPLICIT_DEF; R600_Reg128:%vreg33
%vreg32<def,tied1> = INSERT_SUBREG %vreg33<tied0>, %vreg31<kill>, sel_x; R600_Reg128:%vreg32,%vreg33 R600_Reg32:%vreg31
%vreg34<def> = COPY %vreg6:sel_y; R600_Reg32:%vreg34 R600_Reg128:%vreg6
%vreg35<def,tied1> = INSERT_SUBREG %vreg32<tied0>, %vreg34<kill>, sel_y; R600_Reg128:%vreg35,%vreg32 R600_Reg32:%vreg34
%vreg36<def,tied1> = INSERT_SUBREG %vreg35<tied0>, %vreg5, sel_z; R600_Reg128:%vreg36,%vreg35 R600_Reg32:%vreg5
%vreg37<def> = COPY %vreg6:sel_w; R600_Reg32:%vreg37 R600_Reg128:%vreg6
%vreg10<def,tied1> = INSERT_SUBREG %vreg36<tied0>, %vreg37<kill>, sel_w; R600_Reg128:%vreg10,%vreg36 R600_Reg32:%vreg37
%vreg9<def> = COPY %vreg6:sel_z; R600_Reg32:%vreg9 R600_Reg128:%vreg6
%vreg38<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 1; R600_Reg32:%vreg38
%vreg11<def> = ADD_INT 0, 0, 1, 0, 0, 0, %vreg7, 0, 0, 0, %vreg38<kill>, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg11,%vreg7,%vreg38
%vreg8<def> = COPY %vreg5; R600_Reg32:%vreg8,%vreg5
JUMP <BB#1>, pred:%noreg
    Successors according to CFG: BB#1

# End machine code for function main.

# *** IR Dump Before Merge disjoint stack slots ***:
# Machine code for function main: SSA
Function Live Ins: %T1_X in %vreg14, %T1_Y in %vreg15, %T1_Z in %vreg16, %T1_W in %vreg17
Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X

0BBB#0: derived from LLVM BB %0
    Live Ins: %T1_X %T1_Y %T1_Z %T1_W
16B%vreg17<def> = COPY %T1_W; R600_TReg32:%vreg17
32B%vreg16<def> = COPY %T1_Z; R600_TReg32:%vreg16
48B%vreg15<def> = COPY %T1_Y; R600_TReg32:%vreg15
64B%vreg14<def> = COPY %T1_X; R600_TReg32:%vreg14
80B%vreg18<def> = COPY %C1_X; R600_Reg32:%vreg18
96B%vreg20<def> = IMPLICIT_DEF; R600_Reg128:%vreg20
112B%vreg19<def,tied1> = INSERT_SUBREG %vreg20<tied0>, %vreg14, sel_x; R600_Reg128:%vreg19,%vreg20 R600_TReg32:%vreg14
128B%vreg2<def> = COPY %C1_Y; R600_Reg32:%vreg2
144B%vreg22<def> = IMPLICIT_DEF; R600_Reg128:%vreg22
160B%vreg21<def,tied1> = INSERT_SUBREG %vreg22<tied0>, %vreg18<kill>, sel_x; R600_Reg128:%vreg21,%vreg22 R600_Reg32:%vreg18
176B%vreg23<def,tied1> = INSERT_SUBREG %vreg19<tied0>, %vreg15, sel_y; R600_Reg128:%vreg23,%vreg19 R600_TReg32:%vreg15
192B%vreg24<def,tied1> = INSERT_SUBREG %vreg21<tied0>, %vreg2, sel_y; R600_Reg128:%vreg24,%vreg21 R600_Reg32:%vreg2
208B%vreg25<def> = COPY %C1_Z; R600_Reg32:%vreg25
224B%vreg26<def,tied1> = INSERT_SUBREG %vreg23<tied0>, %vreg16, sel_z; R600_Reg128:%vreg26,%vreg23 R600_TReg32:%vreg16
240B%vreg27<def,tied1> = INSERT_SUBREG %vreg24<tied0>, %vreg25<kill>, sel_z; R600_Reg128:%vreg27,%vreg24 R600_Reg32:%vreg25
256B%vreg28<def> = COPY %C1_W; R600_Reg32:%vreg28
272B%vreg3<def,tied1> = INSERT_SUBREG %vreg27<tied0>, %vreg28<kill>, sel_w; R600_Reg128:%vreg3,%vreg27 R600_Reg32:%vreg28
288B%vreg1<def,tied1> = INSERT_SUBREG %vreg26<tied0>, %vreg17, sel_w; R600_Reg128:%vreg1,%vreg26 R600_TReg32:%vreg17
304B%vreg13<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg13
320B%vreg0<def> = COPY %C0_X; R600_Reg32:%vreg0
336B%vreg12<def> = IMPLICIT_DEF; R600_Reg32:%vreg12
    Successors according to CFG: BB#1

352BBB#1: derived from LLVM BB %25
    Predecessors according to CFG: BB#0 BB#3
368B%vreg5<def> = PHI %vreg2, <BB#0>, %vreg9, <BB#3>; R600_Reg32:%vreg5,%vreg2,%vreg9
384B%vreg6<def> = PHI %vreg3, <BB#0>, %vreg10, <BB#3>; R600_Reg128:%vreg6,%vreg3,%vreg10
400B%vreg7<def> = PHI %vreg13, <BB#0>, %vreg11, <BB#3>; R600_Reg32:%vreg7,%vreg13,%vreg11
416B%vreg29<def> = SETGT_INT 0, 0, 1, 0, 0, 0, %vreg0, 0, 0, 0, %vreg7, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg29,%vreg0,%vreg7
432B%vreg30<def> = COPY %vreg29; GPRI32:%vreg30 R600_Reg32:%vreg29
448B%PREDICATE_BIT<def> = PRED_X %vreg30<kill>, 152, 16; GPRI32:%vreg30
464BJUMP <BB#3>, pred:%PREDICATE_BIT<kill>
480BJUMP <BB#2>, pred:%noreg
    Successors according to CFG: BB#2(4) BB#3(124)

496BBB#2: derived from LLVM BB %31
    Predecessors according to CFG: BB#1
512B%vreg39<def> = COPY %vreg6:sel_x; R600_Reg32:%vreg39 R600_Reg128:%vreg6
528B%T2_X<def> = COPY %vreg39; R600_Reg32:%vreg39
544B%vreg40<def> = COPY %vreg6:sel_y; R600_Reg32:%vreg40 R600_Reg128:%vreg6
560B%T2_Y<def> = COPY %vreg40; R600_Reg32:%vreg40
576B%vreg41<def> = COPY %vreg6:sel_z; R600_Reg32:%vreg41 R600_Reg128:%vreg6
592B%T2_Z<def> = COPY %vreg41; R600_Reg32:%vreg41
608B%vreg42<def> = COPY %vreg6:sel_w; R600_Reg32:%vreg42 R600_Reg128:%vreg6
624B%T2_W<def> = COPY %vreg42; R600_Reg32:%vreg42
640B%vreg43<def> = COPY %vreg1:sel_x; R600_Reg32:%vreg43 R600_Reg128:%vreg1
656B%T1_X<def> = COPY %vreg43; R600_Reg32:%vreg43
672B%vreg44<def> = COPY %vreg1:sel_y; R600_Reg32:%vreg44 R600_Reg128:%vreg1
688B%T1_Y<def> = COPY %vreg44; R600_Reg32:%vreg44
704B%vreg45<def> = COPY %vreg1:sel_z; R600_Reg32:%vreg45 R600_Reg128:%vreg1
720B%T1_Z<def> = COPY %vreg45; R600_Reg32:%vreg45
736B%vreg46<def> = COPY %vreg1:sel_w; R600_Reg32:%vreg46 R600_Reg128:%vreg1
752B%T1_W<def> = COPY %vreg46; R600_Reg32:%vreg46
768BRETURN

784BBB#3: derived from LLVM BB %41
    Predecessors according to CFG: BB#1
800B%vreg31<def> = COPY %vreg6:sel_x; R600_Reg32:%vreg31 R600_Reg128:%vreg6
816B%vreg33<def> = IMPLICIT_DEF; R600_Reg128:%vreg33
832B%vreg32<def,tied1> = INSERT_SUBREG %vreg33<tied0>, %vreg31<kill>, sel_x; R600_Reg128:%vreg32,%vreg33 R600_Reg32:%vreg31
848B%vreg34<def> = COPY %vreg6:sel_y; R600_Reg32:%vreg34 R600_Reg128:%vreg6
864B%vreg35<def,tied1> = INSERT_SUBREG %vreg32<tied0>, %vreg34<kill>, sel_y; R600_Reg128:%vreg35,%vreg32 R600_Reg32:%vreg34
880B%vreg36<def,tied1> = INSERT_SUBREG %vreg35<tied0>, %vreg5, sel_z; R600_Reg128:%vreg36,%vreg35 R600_Reg32:%vreg5
896B%vreg37<def> = COPY %vreg6:sel_w; R600_Reg32:%vreg37 R600_Reg128:%vreg6
912B%vreg10<def,tied1> = INSERT_SUBREG %vreg36<tied0>, %vreg37<kill>, sel_w; R600_Reg128:%vreg10,%vreg36 R600_Reg32:%vreg37
928B%vreg9<def> = COPY %vreg6:sel_z; R600_Reg32:%vreg9 R600_Reg128:%vreg6
944B%vreg38<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 1; R600_Reg32:%vreg38
960B%vreg11<def> = ADD_INT 0, 0, 1, 0, 0, 0, %vreg7, 0, 0, 0, %vreg38<kill>, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg11,%vreg7,%vreg38
976B%vreg8<def> = COPY %vreg5; R600_Reg32:%vreg8,%vreg5
992BJUMP <BB#1>, pred:%noreg
    Successors according to CFG: BB#1

# End machine code for function main.

# *** IR Dump Before Local Stack Slot Allocation ***:
# Machine code for function main: SSA
Function Live Ins: %T1_X in %vreg14, %T1_Y in %vreg15, %T1_Z in %vreg16, %T1_W in %vreg17
Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X

BB#0: derived from LLVM BB %0
    Live Ins: %T1_X %T1_Y %T1_Z %T1_W
%vreg17<def> = COPY %T1_W; R600_TReg32:%vreg17
%vreg16<def> = COPY %T1_Z; R600_TReg32:%vreg16
%vreg15<def> = COPY %T1_Y; R600_TReg32:%vreg15
%vreg14<def> = COPY %T1_X; R600_TReg32:%vreg14
%vreg18<def> = COPY %C1_X; R600_Reg32:%vreg18
%vreg20<def> = IMPLICIT_DEF; R600_Reg128:%vreg20
%vreg19<def,tied1> = INSERT_SUBREG %vreg20<tied0>, %vreg14, sel_x; R600_Reg128:%vreg19,%vreg20 R600_TReg32:%vreg14
%vreg2<def> = COPY %C1_Y; R600_Reg32:%vreg2
%vreg22<def> = IMPLICIT_DEF; R600_Reg128:%vreg22
%vreg21<def,tied1> = INSERT_SUBREG %vreg22<tied0>, %vreg18<kill>, sel_x; R600_Reg128:%vreg21,%vreg22 R600_Reg32:%vreg18
%vreg23<def,tied1> = INSERT_SUBREG %vreg19<tied0>, %vreg15, sel_y; R600_Reg128:%vreg23,%vreg19 R600_TReg32:%vreg15
%vreg24<def,tied1> = INSERT_SUBREG %vreg21<tied0>, %vreg2, sel_y; R600_Reg128:%vreg24,%vreg21 R600_Reg32:%vreg2
%vreg25<def> = COPY %C1_Z; R600_Reg32:%vreg25
%vreg26<def,tied1> = INSERT_SUBREG %vreg23<tied0>, %vreg16, sel_z; R600_Reg128:%vreg26,%vreg23 R600_TReg32:%vreg16
%vreg27<def,tied1> = INSERT_SUBREG %vreg24<tied0>, %vreg25<kill>, sel_z; R600_Reg128:%vreg27,%vreg24 R600_Reg32:%vreg25
%vreg28<def> = COPY %C1_W; R600_Reg32:%vreg28
%vreg3<def,tied1> = INSERT_SUBREG %vreg27<tied0>, %vreg28<kill>, sel_w; R600_Reg128:%vreg3,%vreg27 R600_Reg32:%vreg28
%vreg1<def,tied1> = INSERT_SUBREG %vreg26<tied0>, %vreg17, sel_w; R600_Reg128:%vreg1,%vreg26 R600_TReg32:%vreg17
%vreg13<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg13
%vreg0<def> = COPY %C0_X; R600_Reg32:%vreg0
%vreg12<def> = IMPLICIT_DEF; R600_Reg32:%vreg12
    Successors according to CFG: BB#1

BB#1: derived from LLVM BB %25
    Predecessors according to CFG: BB#0 BB#3
%vreg5<def> = PHI %vreg2, <BB#0>, %vreg9, <BB#3>; R600_Reg32:%vreg5,%vreg2,%vreg9
%vreg6<def> = PHI %vreg3, <BB#0>, %vreg10, <BB#3>; R600_Reg128:%vreg6,%vreg3,%vreg10
%vreg7<def> = PHI %vreg13, <BB#0>, %vreg11, <BB#3>; R600_Reg32:%vreg7,%vreg13,%vreg11
%vreg29<def> = SETGT_INT 0, 0, 1, 0, 0, 0, %vreg0, 0, 0, 0, %vreg7, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg29,%vreg0,%vreg7
%vreg30<def> = COPY %vreg29; GPRI32:%vreg30 R600_Reg32:%vreg29
%PREDICATE_BIT<def> = PRED_X %vreg30<kill>, 152, 16; GPRI32:%vreg30
JUMP <BB#3>, pred:%PREDICATE_BIT<kill>
JUMP <BB#2>, pred:%noreg
    Successors according to CFG: BB#2(4) BB#3(124)

BB#2: derived from LLVM BB %31
    Predecessors according to CFG: BB#1
%vreg39<def> = COPY %vreg6:sel_x; R600_Reg32:%vreg39 R600_Reg128:%vreg6
%T2_X<def> = COPY %vreg39; R600_Reg32:%vreg39
%vreg40<def> = COPY %vreg6:sel_y; R600_Reg32:%vreg40 R600_Reg128:%vreg6
%T2_Y<def> = COPY %vreg40; R600_Reg32:%vreg40
%vreg41<def> = COPY %vreg6:sel_z; R600_Reg32:%vreg41 R600_Reg128:%vreg6
%T2_Z<def> = COPY %vreg41; R600_Reg32:%vreg41
%vreg42<def> = COPY %vreg6:sel_w; R600_Reg32:%vreg42 R600_Reg128:%vreg6
%T2_W<def> = COPY %vreg42; R600_Reg32:%vreg42
%vreg43<def> = COPY %vreg1:sel_x; R600_Reg32:%vreg43 R600_Reg128:%vreg1
%T1_X<def> = COPY %vreg43; R600_Reg32:%vreg43
%vreg44<def> = COPY %vreg1:sel_y; R600_Reg32:%vreg44 R600_Reg128:%vreg1
%T1_Y<def> = COPY %vreg44; R600_Reg32:%vreg44
%vreg45<def> = COPY %vreg1:sel_z; R600_Reg32:%vreg45 R600_Reg128:%vreg1
%T1_Z<def> = COPY %vreg45; R600_Reg32:%vreg45
%vreg46<def> = COPY %vreg1:sel_w; R600_Reg32:%vreg46 R600_Reg128:%vreg1
%T1_W<def> = COPY %vreg46; R600_Reg32:%vreg46
RETURN

BB#3: derived from LLVM BB %41
    Predecessors according to CFG: BB#1
%vreg31<def> = COPY %vreg6:sel_x; R600_Reg32:%vreg31 R600_Reg128:%vreg6
%vreg33<def> = IMPLICIT_DEF; R600_Reg128:%vreg33
%vreg32<def,tied1> = INSERT_SUBREG %vreg33<tied0>, %vreg31<kill>, sel_x; R600_Reg128:%vreg32,%vreg33 R600_Reg32:%vreg31
%vreg34<def> = COPY %vreg6:sel_y; R600_Reg32:%vreg34 R600_Reg128:%vreg6
%vreg35<def,tied1> = INSERT_SUBREG %vreg32<tied0>, %vreg34<kill>, sel_y; R600_Reg128:%vreg35,%vreg32 R600_Reg32:%vreg34
%vreg36<def,tied1> = INSERT_SUBREG %vreg35<tied0>, %vreg5, sel_z; R600_Reg128:%vreg36,%vreg35 R600_Reg32:%vreg5
%vreg37<def> = COPY %vreg6:sel_w; R600_Reg32:%vreg37 R600_Reg128:%vreg6
%vreg10<def,tied1> = INSERT_SUBREG %vreg36<tied0>, %vreg37<kill>, sel_w; R600_Reg128:%vreg10,%vreg36 R600_Reg32:%vreg37
%vreg9<def> = COPY %vreg6:sel_z; R600_Reg32:%vreg9 R600_Reg128:%vreg6
%vreg38<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 1; R600_Reg32:%vreg38
%vreg11<def> = ADD_INT 0, 0, 1, 0, 0, 0, %vreg7, 0, 0, 0, %vreg38<kill>, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg11,%vreg7,%vreg38
%vreg8<def> = COPY %vreg5; R600_Reg32:%vreg8,%vreg5
JUMP <BB#1>, pred:%noreg
    Successors according to CFG: BB#1

# End machine code for function main.

# *** IR Dump Before Remove dead machine instructions ***:
# Machine code for function main: SSA
Function Live Ins: %T1_X in %vreg14, %T1_Y in %vreg15, %T1_Z in %vreg16, %T1_W in %vreg17
Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X

BB#0: derived from LLVM BB %0
    Live Ins: %T1_X %T1_Y %T1_Z %T1_W
%vreg17<def> = COPY %T1_W; R600_TReg32:%vreg17
%vreg16<def> = COPY %T1_Z; R600_TReg32:%vreg16
%vreg15<def> = COPY %T1_Y; R600_TReg32:%vreg15
%vreg14<def> = COPY %T1_X; R600_TReg32:%vreg14
%vreg18<def> = COPY %C1_X; R600_Reg32:%vreg18
%vreg20<def> = IMPLICIT_DEF; R600_Reg128:%vreg20
%vreg19<def,tied1> = INSERT_SUBREG %vreg20<tied0>, %vreg14, sel_x; R600_Reg128:%vreg19,%vreg20 R600_TReg32:%vreg14
%vreg2<def> = COPY %C1_Y; R600_Reg32:%vreg2
%vreg22<def> = IMPLICIT_DEF; R600_Reg128:%vreg22
%vreg21<def,tied1> = INSERT_SUBREG %vreg22<tied0>, %vreg18<kill>, sel_x; R600_Reg128:%vreg21,%vreg22 R600_Reg32:%vreg18
%vreg23<def,tied1> = INSERT_SUBREG %vreg19<tied0>, %vreg15, sel_y; R600_Reg128:%vreg23,%vreg19 R600_TReg32:%vreg15
%vreg24<def,tied1> = INSERT_SUBREG %vreg21<tied0>, %vreg2, sel_y; R600_Reg128:%vreg24,%vreg21 R600_Reg32:%vreg2
%vreg25<def> = COPY %C1_Z; R600_Reg32:%vreg25
%vreg26<def,tied1> = INSERT_SUBREG %vreg23<tied0>, %vreg16, sel_z; R600_Reg128:%vreg26,%vreg23 R600_TReg32:%vreg16
%vreg27<def,tied1> = INSERT_SUBREG %vreg24<tied0>, %vreg25<kill>, sel_z; R600_Reg128:%vreg27,%vreg24 R600_Reg32:%vreg25
%vreg28<def> = COPY %C1_W; R600_Reg32:%vreg28
%vreg3<def,tied1> = INSERT_SUBREG %vreg27<tied0>, %vreg28<kill>, sel_w; R600_Reg128:%vreg3,%vreg27 R600_Reg32:%vreg28
%vreg1<def,tied1> = INSERT_SUBREG %vreg26<tied0>, %vreg17, sel_w; R600_Reg128:%vreg1,%vreg26 R600_TReg32:%vreg17
%vreg13<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg13
%vreg0<def> = COPY %C0_X; R600_Reg32:%vreg0
%vreg12<def> = IMPLICIT_DEF; R600_Reg32:%vreg12
    Successors according to CFG: BB#1

BB#1: derived from LLVM BB %25
    Predecessors according to CFG: BB#0 BB#3
%vreg5<def> = PHI %vreg2, <BB#0>, %vreg9, <BB#3>; R600_Reg32:%vreg5,%vreg2,%vreg9
%vreg6<def> = PHI %vreg3, <BB#0>, %vreg10, <BB#3>; R600_Reg128:%vreg6,%vreg3,%vreg10
%vreg7<def> = PHI %vreg13, <BB#0>, %vreg11, <BB#3>; R600_Reg32:%vreg7,%vreg13,%vreg11
%vreg29<def> = SETGT_INT 0, 0, 1, 0, 0, 0, %vreg0, 0, 0, 0, %vreg7, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg29,%vreg0,%vreg7
%vreg30<def> = COPY %vreg29; GPRI32:%vreg30 R600_Reg32:%vreg29
%PREDICATE_BIT<def> = PRED_X %vreg30<kill>, 152, 16; GPRI32:%vreg30
JUMP <BB#3>, pred:%PREDICATE_BIT<kill>
JUMP <BB#2>, pred:%noreg
    Successors according to CFG: BB#2(4) BB#3(124)

BB#2: derived from LLVM BB %31
    Predecessors according to CFG: BB#1
%vreg39<def> = COPY %vreg6:sel_x; R600_Reg32:%vreg39 R600_Reg128:%vreg6
%T2_X<def> = COPY %vreg39; R600_Reg32:%vreg39
%vreg40<def> = COPY %vreg6:sel_y; R600_Reg32:%vreg40 R600_Reg128:%vreg6
%T2_Y<def> = COPY %vreg40; R600_Reg32:%vreg40
%vreg41<def> = COPY %vreg6:sel_z; R600_Reg32:%vreg41 R600_Reg128:%vreg6
%T2_Z<def> = COPY %vreg41; R600_Reg32:%vreg41
%vreg42<def> = COPY %vreg6:sel_w; R600_Reg32:%vreg42 R600_Reg128:%vreg6
%T2_W<def> = COPY %vreg42; R600_Reg32:%vreg42
%vreg43<def> = COPY %vreg1:sel_x; R600_Reg32:%vreg43 R600_Reg128:%vreg1
%T1_X<def> = COPY %vreg43; R600_Reg32:%vreg43
%vreg44<def> = COPY %vreg1:sel_y; R600_Reg32:%vreg44 R600_Reg128:%vreg1
%T1_Y<def> = COPY %vreg44; R600_Reg32:%vreg44
%vreg45<def> = COPY %vreg1:sel_z; R600_Reg32:%vreg45 R600_Reg128:%vreg1
%T1_Z<def> = COPY %vreg45; R600_Reg32:%vreg45
%vreg46<def> = COPY %vreg1:sel_w; R600_Reg32:%vreg46 R600_Reg128:%vreg1
%T1_W<def> = COPY %vreg46; R600_Reg32:%vreg46
RETURN

BB#3: derived from LLVM BB %41
    Predecessors according to CFG: BB#1
%vreg31<def> = COPY %vreg6:sel_x; R600_Reg32:%vreg31 R600_Reg128:%vreg6
%vreg33<def> = IMPLICIT_DEF; R600_Reg128:%vreg33
%vreg32<def,tied1> = INSERT_SUBREG %vreg33<tied0>, %vreg31<kill>, sel_x; R600_Reg128:%vreg32,%vreg33 R600_Reg32:%vreg31
%vreg34<def> = COPY %vreg6:sel_y; R600_Reg32:%vreg34 R600_Reg128:%vreg6
%vreg35<def,tied1> = INSERT_SUBREG %vreg32<tied0>, %vreg34<kill>, sel_y; R600_Reg128:%vreg35,%vreg32 R600_Reg32:%vreg34
%vreg36<def,tied1> = INSERT_SUBREG %vreg35<tied0>, %vreg5, sel_z; R600_Reg128:%vreg36,%vreg35 R600_Reg32:%vreg5
%vreg37<def> = COPY %vreg6:sel_w; R600_Reg32:%vreg37 R600_Reg128:%vreg6
%vreg10<def,tied1> = INSERT_SUBREG %vreg36<tied0>, %vreg37<kill>, sel_w; R600_Reg128:%vreg10,%vreg36 R600_Reg32:%vreg37
%vreg9<def> = COPY %vreg6:sel_z; R600_Reg32:%vreg9 R600_Reg128:%vreg6
%vreg38<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 1; R600_Reg32:%vreg38
%vreg11<def> = ADD_INT 0, 0, 1, 0, 0, 0, %vreg7, 0, 0, 0, %vreg38<kill>, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg11,%vreg7,%vreg38
%vreg8<def> = COPY %vreg5; R600_Reg32:%vreg8,%vreg5
JUMP <BB#1>, pred:%noreg
    Successors according to CFG: BB#1

# End machine code for function main.

# *** IR Dump Before Machine Loop Invariant Code Motion ***:
# Machine code for function main: SSA
Function Live Ins: %T1_X in %vreg14, %T1_Y in %vreg15, %T1_Z in %vreg16, %T1_W in %vreg17
Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X

BB#0: derived from LLVM BB %0
    Live Ins: %T1_X %T1_Y %T1_Z %T1_W
%vreg17<def> = COPY %T1_W; R600_TReg32:%vreg17
%vreg16<def> = COPY %T1_Z; R600_TReg32:%vreg16
%vreg15<def> = COPY %T1_Y; R600_TReg32:%vreg15
%vreg14<def> = COPY %T1_X; R600_TReg32:%vreg14
%vreg18<def> = COPY %C1_X; R600_Reg32:%vreg18
%vreg20<def> = IMPLICIT_DEF; R600_Reg128:%vreg20
%vreg19<def,tied1> = INSERT_SUBREG %vreg20<tied0>, %vreg14, sel_x; R600_Reg128:%vreg19,%vreg20 R600_TReg32:%vreg14
%vreg2<def> = COPY %C1_Y; R600_Reg32:%vreg2
%vreg22<def> = IMPLICIT_DEF; R600_Reg128:%vreg22
%vreg21<def,tied1> = INSERT_SUBREG %vreg22<tied0>, %vreg18<kill>, sel_x; R600_Reg128:%vreg21,%vreg22 R600_Reg32:%vreg18
%vreg23<def,tied1> = INSERT_SUBREG %vreg19<tied0>, %vreg15, sel_y; R600_Reg128:%vreg23,%vreg19 R600_TReg32:%vreg15
%vreg24<def,tied1> = INSERT_SUBREG %vreg21<tied0>, %vreg2, sel_y; R600_Reg128:%vreg24,%vreg21 R600_Reg32:%vreg2
%vreg25<def> = COPY %C1_Z; R600_Reg32:%vreg25
%vreg26<def,tied1> = INSERT_SUBREG %vreg23<tied0>, %vreg16, sel_z; R600_Reg128:%vreg26,%vreg23 R600_TReg32:%vreg16
%vreg27<def,tied1> = INSERT_SUBREG %vreg24<tied0>, %vreg25<kill>, sel_z; R600_Reg128:%vreg27,%vreg24 R600_Reg32:%vreg25
%vreg28<def> = COPY %C1_W; R600_Reg32:%vreg28
%vreg3<def,tied1> = INSERT_SUBREG %vreg27<tied0>, %vreg28<kill>, sel_w; R600_Reg128:%vreg3,%vreg27 R600_Reg32:%vreg28
%vreg1<def,tied1> = INSERT_SUBREG %vreg26<tied0>, %vreg17, sel_w; R600_Reg128:%vreg1,%vreg26 R600_TReg32:%vreg17
%vreg13<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg13
%vreg0<def> = COPY %C0_X; R600_Reg32:%vreg0
    Successors according to CFG: BB#1

BB#1: derived from LLVM BB %25
    Predecessors according to CFG: BB#0 BB#3
%vreg5<def> = PHI %vreg2, <BB#0>, %vreg9, <BB#3>; R600_Reg32:%vreg5,%vreg2,%vreg9
%vreg6<def> = PHI %vreg3, <BB#0>, %vreg10, <BB#3>; R600_Reg128:%vreg6,%vreg3,%vreg10
%vreg7<def> = PHI %vreg13, <BB#0>, %vreg11, <BB#3>; R600_Reg32:%vreg7,%vreg13,%vreg11
%vreg29<def> = SETGT_INT 0, 0, 1, 0, 0, 0, %vreg0, 0, 0, 0, %vreg7, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg29,%vreg0,%vreg7
%vreg30<def> = COPY %vreg29; GPRI32:%vreg30 R600_Reg32:%vreg29
%PREDICATE_BIT<def> = PRED_X %vreg30<kill>, 152, 16; GPRI32:%vreg30
JUMP <BB#3>, pred:%PREDICATE_BIT<kill>
JUMP <BB#2>, pred:%noreg
    Successors according to CFG: BB#2(4) BB#3(124)

BB#2: derived from LLVM BB %31
    Predecessors according to CFG: BB#1
%vreg39<def> = COPY %vreg6:sel_x; R600_Reg32:%vreg39 R600_Reg128:%vreg6
%T2_X<def> = COPY %vreg39; R600_Reg32:%vreg39
%vreg40<def> = COPY %vreg6:sel_y; R600_Reg32:%vreg40 R600_Reg128:%vreg6
%T2_Y<def> = COPY %vreg40; R600_Reg32:%vreg40
%vreg41<def> = COPY %vreg6:sel_z; R600_Reg32:%vreg41 R600_Reg128:%vreg6
%T2_Z<def> = COPY %vreg41; R600_Reg32:%vreg41
%vreg42<def> = COPY %vreg6:sel_w; R600_Reg32:%vreg42 R600_Reg128:%vreg6
%T2_W<def> = COPY %vreg42; R600_Reg32:%vreg42
%vreg43<def> = COPY %vreg1:sel_x; R600_Reg32:%vreg43 R600_Reg128:%vreg1
%T1_X<def> = COPY %vreg43; R600_Reg32:%vreg43
%vreg44<def> = COPY %vreg1:sel_y; R600_Reg32:%vreg44 R600_Reg128:%vreg1
%T1_Y<def> = COPY %vreg44; R600_Reg32:%vreg44
%vreg45<def> = COPY %vreg1:sel_z; R600_Reg32:%vreg45 R600_Reg128:%vreg1
%T1_Z<def> = COPY %vreg45; R600_Reg32:%vreg45
%vreg46<def> = COPY %vreg1:sel_w; R600_Reg32:%vreg46 R600_Reg128:%vreg1
%T1_W<def> = COPY %vreg46; R600_Reg32:%vreg46
RETURN

BB#3: derived from LLVM BB %41
    Predecessors according to CFG: BB#1
%vreg31<def> = COPY %vreg6:sel_x; R600_Reg32:%vreg31 R600_Reg128:%vreg6
%vreg33<def> = IMPLICIT_DEF; R600_Reg128:%vreg33
%vreg32<def,tied1> = INSERT_SUBREG %vreg33<tied0>, %vreg31<kill>, sel_x; R600_Reg128:%vreg32,%vreg33 R600_Reg32:%vreg31
%vreg34<def> = COPY %vreg6:sel_y; R600_Reg32:%vreg34 R600_Reg128:%vreg6
%vreg35<def,tied1> = INSERT_SUBREG %vreg32<tied0>, %vreg34<kill>, sel_y; R600_Reg128:%vreg35,%vreg32 R600_Reg32:%vreg34
%vreg36<def,tied1> = INSERT_SUBREG %vreg35<tied0>, %vreg5, sel_z; R600_Reg128:%vreg36,%vreg35 R600_Reg32:%vreg5
%vreg37<def> = COPY %vreg6:sel_w; R600_Reg32:%vreg37 R600_Reg128:%vreg6
%vreg10<def,tied1> = INSERT_SUBREG %vreg36<tied0>, %vreg37<kill>, sel_w; R600_Reg128:%vreg10,%vreg36 R600_Reg32:%vreg37
%vreg9<def> = COPY %vreg6:sel_z; R600_Reg32:%vreg9 R600_Reg128:%vreg6
%vreg38<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 1; R600_Reg32:%vreg38
%vreg11<def> = ADD_INT 0, 0, 1, 0, 0, 0, %vreg7, 0, 0, 0, %vreg38<kill>, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg11,%vreg7,%vreg38
JUMP <BB#1>, pred:%noreg
    Successors according to CFG: BB#1

# End machine code for function main.

# *** IR Dump Before Machine Common Subexpression Elimination ***:
# Machine code for function main: SSA
Function Live Ins: %T1_X in %vreg14, %T1_Y in %vreg15, %T1_Z in %vreg16, %T1_W in %vreg17
Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X

BB#0: derived from LLVM BB %0
    Live Ins: %T1_X %T1_Y %T1_Z %T1_W
%vreg17<def> = COPY %T1_W; R600_TReg32:%vreg17
%vreg16<def> = COPY %T1_Z; R600_TReg32:%vreg16
%vreg15<def> = COPY %T1_Y; R600_TReg32:%vreg15
%vreg14<def> = COPY %T1_X; R600_TReg32:%vreg14
%vreg18<def> = COPY %C1_X; R600_Reg32:%vreg18
%vreg20<def> = IMPLICIT_DEF; R600_Reg128:%vreg20
%vreg19<def,tied1> = INSERT_SUBREG %vreg20<tied0>, %vreg14, sel_x; R600_Reg128:%vreg19,%vreg20 R600_TReg32:%vreg14
%vreg2<def> = COPY %C1_Y; R600_Reg32:%vreg2
%vreg22<def> = IMPLICIT_DEF; R600_Reg128:%vreg22
%vreg21<def,tied1> = INSERT_SUBREG %vreg22<tied0>, %vreg18<kill>, sel_x; R600_Reg128:%vreg21,%vreg22 R600_Reg32:%vreg18
%vreg23<def,tied1> = INSERT_SUBREG %vreg19<tied0>, %vreg15, sel_y; R600_Reg128:%vreg23,%vreg19 R600_TReg32:%vreg15
%vreg24<def,tied1> = INSERT_SUBREG %vreg21<tied0>, %vreg2, sel_y; R600_Reg128:%vreg24,%vreg21 R600_Reg32:%vreg2
%vreg25<def> = COPY %C1_Z; R600_Reg32:%vreg25
%vreg26<def,tied1> = INSERT_SUBREG %vreg23<tied0>, %vreg16, sel_z; R600_Reg128:%vreg26,%vreg23 R600_TReg32:%vreg16
%vreg27<def,tied1> = INSERT_SUBREG %vreg24<tied0>, %vreg25<kill>, sel_z; R600_Reg128:%vreg27,%vreg24 R600_Reg32:%vreg25
%vreg28<def> = COPY %C1_W; R600_Reg32:%vreg28
%vreg3<def,tied1> = INSERT_SUBREG %vreg27<tied0>, %vreg28<kill>, sel_w; R600_Reg128:%vreg3,%vreg27 R600_Reg32:%vreg28
%vreg1<def,tied1> = INSERT_SUBREG %vreg26<tied0>, %vreg17, sel_w; R600_Reg128:%vreg1,%vreg26 R600_TReg32:%vreg17
%vreg13<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg13
%vreg0<def> = COPY %C0_X; R600_Reg32:%vreg0
%vreg33<def> = IMPLICIT_DEF; R600_Reg128:%vreg33
    Successors according to CFG: BB#1

BB#1: derived from LLVM BB %25
    Predecessors according to CFG: BB#0 BB#3
%vreg5<def> = PHI %vreg2, <BB#0>, %vreg9, <BB#3>; R600_Reg32:%vreg5,%vreg2,%vreg9
%vreg6<def> = PHI %vreg3, <BB#0>, %vreg10, <BB#3>; R600_Reg128:%vreg6,%vreg3,%vreg10
%vreg7<def> = PHI %vreg13, <BB#0>, %vreg11, <BB#3>; R600_Reg32:%vreg7,%vreg13,%vreg11
%vreg29<def> = SETGT_INT 0, 0, 1, 0, 0, 0, %vreg0, 0, 0, 0, %vreg7, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg29,%vreg0,%vreg7
%vreg30<def> = COPY %vreg29; GPRI32:%vreg30 R600_Reg32:%vreg29
%PREDICATE_BIT<def> = PRED_X %vreg30<kill>, 152, 16; GPRI32:%vreg30
JUMP <BB#3>, pred:%PREDICATE_BIT<kill>
JUMP <BB#2>, pred:%noreg
    Successors according to CFG: BB#2(4) BB#3(124)

BB#2: derived from LLVM BB %31
    Predecessors according to CFG: BB#1
%vreg39<def> = COPY %vreg6:sel_x; R600_Reg32:%vreg39 R600_Reg128:%vreg6
%T2_X<def> = COPY %vreg39; R600_Reg32:%vreg39
%vreg40<def> = COPY %vreg6:sel_y; R600_Reg32:%vreg40 R600_Reg128:%vreg6
%T2_Y<def> = COPY %vreg40; R600_Reg32:%vreg40
%vreg41<def> = COPY %vreg6:sel_z; R600_Reg32:%vreg41 R600_Reg128:%vreg6
%T2_Z<def> = COPY %vreg41; R600_Reg32:%vreg41
%vreg42<def> = COPY %vreg6:sel_w; R600_Reg32:%vreg42 R600_Reg128:%vreg6
%T2_W<def> = COPY %vreg42; R600_Reg32:%vreg42
%vreg43<def> = COPY %vreg1:sel_x; R600_Reg32:%vreg43 R600_Reg128:%vreg1
%T1_X<def> = COPY %vreg43; R600_Reg32:%vreg43
%vreg44<def> = COPY %vreg1:sel_y; R600_Reg32:%vreg44 R600_Reg128:%vreg1
%T1_Y<def> = COPY %vreg44; R600_Reg32:%vreg44
%vreg45<def> = COPY %vreg1:sel_z; R600_Reg32:%vreg45 R600_Reg128:%vreg1
%T1_Z<def> = COPY %vreg45; R600_Reg32:%vreg45
%vreg46<def> = COPY %vreg1:sel_w; R600_Reg32:%vreg46 R600_Reg128:%vreg1
%T1_W<def> = COPY %vreg46; R600_Reg32:%vreg46
RETURN

BB#3: derived from LLVM BB %41
    Predecessors according to CFG: BB#1
%vreg31<def> = COPY %vreg6:sel_x; R600_Reg32:%vreg31 R600_Reg128:%vreg6
%vreg32<def,tied1> = INSERT_SUBREG %vreg33<tied0>, %vreg31<kill>, sel_x; R600_Reg128:%vreg32,%vreg33 R600_Reg32:%vreg31
%vreg34<def> = COPY %vreg6:sel_y; R600_Reg32:%vreg34 R600_Reg128:%vreg6
%vreg35<def,tied1> = INSERT_SUBREG %vreg32<tied0>, %vreg34<kill>, sel_y; R600_Reg128:%vreg35,%vreg32 R600_Reg32:%vreg34
%vreg36<def,tied1> = INSERT_SUBREG %vreg35<tied0>, %vreg5, sel_z; R600_Reg128:%vreg36,%vreg35 R600_Reg32:%vreg5
%vreg37<def> = COPY %vreg6:sel_w; R600_Reg32:%vreg37 R600_Reg128:%vreg6
%vreg10<def,tied1> = INSERT_SUBREG %vreg36<tied0>, %vreg37<kill>, sel_w; R600_Reg128:%vreg10,%vreg36 R600_Reg32:%vreg37
%vreg9<def> = COPY %vreg6:sel_z; R600_Reg32:%vreg9 R600_Reg128:%vreg6
%vreg38<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 1; R600_Reg32:%vreg38
%vreg11<def> = ADD_INT 0, 0, 1, 0, 0, 0, %vreg7, 0, 0, 0, %vreg38<kill>, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg11,%vreg7,%vreg38
JUMP <BB#1>, pred:%noreg
    Successors according to CFG: BB#1

# End machine code for function main.

# *** IR Dump Before Machine code sinking ***:
# Machine code for function main: SSA
Function Live Ins: %T1_X in %vreg14, %T1_Y in %vreg15, %T1_Z in %vreg16, %T1_W in %vreg17
Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X

BB#0: derived from LLVM BB %0
    Live Ins: %T1_X %T1_Y %T1_Z %T1_W
%vreg17<def> = COPY %T1_W; R600_TReg32:%vreg17
%vreg16<def> = COPY %T1_Z; R600_TReg32:%vreg16
%vreg15<def> = COPY %T1_Y; R600_TReg32:%vreg15
%vreg14<def> = COPY %T1_X; R600_TReg32:%vreg14
%vreg18<def> = COPY %C1_X; R600_Reg32:%vreg18
%vreg20<def> = IMPLICIT_DEF; R600_Reg128:%vreg20
%vreg19<def,tied1> = INSERT_SUBREG %vreg20<tied0>, %vreg14, sel_x; R600_Reg128:%vreg19,%vreg20 R600_TReg32:%vreg14
%vreg2<def> = COPY %C1_Y; R600_Reg32:%vreg2
%vreg22<def> = IMPLICIT_DEF; R600_Reg128:%vreg22
%vreg21<def,tied1> = INSERT_SUBREG %vreg22<tied0>, %vreg18<kill>, sel_x; R600_Reg128:%vreg21,%vreg22 R600_Reg32:%vreg18
%vreg23<def,tied1> = INSERT_SUBREG %vreg19<tied0>, %vreg15, sel_y; R600_Reg128:%vreg23,%vreg19 R600_TReg32:%vreg15
%vreg24<def,tied1> = INSERT_SUBREG %vreg21<tied0>, %vreg2, sel_y; R600_Reg128:%vreg24,%vreg21 R600_Reg32:%vreg2
%vreg25<def> = COPY %C1_Z; R600_Reg32:%vreg25
%vreg26<def,tied1> = INSERT_SUBREG %vreg23<tied0>, %vreg16, sel_z; R600_Reg128:%vreg26,%vreg23 R600_TReg32:%vreg16
%vreg27<def,tied1> = INSERT_SUBREG %vreg24<tied0>, %vreg25<kill>, sel_z; R600_Reg128:%vreg27,%vreg24 R600_Reg32:%vreg25
%vreg28<def> = COPY %C1_W; R600_Reg32:%vreg28
%vreg3<def,tied1> = INSERT_SUBREG %vreg27<tied0>, %vreg28<kill>, sel_w; R600_Reg128:%vreg3,%vreg27 R600_Reg32:%vreg28
%vreg1<def,tied1> = INSERT_SUBREG %vreg26<tied0>, %vreg17, sel_w; R600_Reg128:%vreg1,%vreg26 R600_TReg32:%vreg17
%vreg13<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg13
%vreg0<def> = COPY %C0_X; R600_Reg32:%vreg0
%vreg33<def> = IMPLICIT_DEF; R600_Reg128:%vreg33
    Successors according to CFG: BB#1

BB#1: derived from LLVM BB %25
    Predecessors according to CFG: BB#0 BB#3
%vreg5<def> = PHI %vreg2, <BB#0>, %vreg9, <BB#3>; R600_Reg32:%vreg5,%vreg2,%vreg9
%vreg6<def> = PHI %vreg3, <BB#0>, %vreg10, <BB#3>; R600_Reg128:%vreg6,%vreg3,%vreg10
%vreg7<def> = PHI %vreg13, <BB#0>, %vreg11, <BB#3>; R600_Reg32:%vreg7,%vreg13,%vreg11
%vreg29<def> = SETGT_INT 0, 0, 1, 0, 0, 0, %vreg0, 0, 0, 0, %vreg7, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg29,%vreg0,%vreg7
%vreg30<def> = COPY %vreg29; GPRI32:%vreg30 R600_Reg32:%vreg29
%PREDICATE_BIT<def> = PRED_X %vreg30<kill>, 152, 16; GPRI32:%vreg30
JUMP <BB#3>, pred:%PREDICATE_BIT<kill>
JUMP <BB#2>, pred:%noreg
    Successors according to CFG: BB#2(4) BB#3(124)

BB#2: derived from LLVM BB %31
    Predecessors according to CFG: BB#1
%vreg39<def> = COPY %vreg6:sel_x; R600_Reg32:%vreg39 R600_Reg128:%vreg6
%T2_X<def> = COPY %vreg39; R600_Reg32:%vreg39
%vreg40<def> = COPY %vreg6:sel_y; R600_Reg32:%vreg40 R600_Reg128:%vreg6
%T2_Y<def> = COPY %vreg40; R600_Reg32:%vreg40
%vreg41<def> = COPY %vreg6:sel_z; R600_Reg32:%vreg41 R600_Reg128:%vreg6
%T2_Z<def> = COPY %vreg41; R600_Reg32:%vreg41
%vreg42<def> = COPY %vreg6:sel_w; R600_Reg32:%vreg42 R600_Reg128:%vreg6
%T2_W<def> = COPY %vreg42; R600_Reg32:%vreg42
%vreg43<def> = COPY %vreg1:sel_x; R600_Reg32:%vreg43 R600_Reg128:%vreg1
%T1_X<def> = COPY %vreg43; R600_Reg32:%vreg43
%vreg44<def> = COPY %vreg1:sel_y; R600_Reg32:%vreg44 R600_Reg128:%vreg1
%T1_Y<def> = COPY %vreg44; R600_Reg32:%vreg44
%vreg45<def> = COPY %vreg1:sel_z; R600_Reg32:%vreg45 R600_Reg128:%vreg1
%T1_Z<def> = COPY %vreg45; R600_Reg32:%vreg45
%vreg46<def> = COPY %vreg1:sel_w; R600_Reg32:%vreg46 R600_Reg128:%vreg1
%T1_W<def> = COPY %vreg46; R600_Reg32:%vreg46
RETURN

BB#3: derived from LLVM BB %41
    Predecessors according to CFG: BB#1
%vreg31<def> = COPY %vreg6:sel_x; R600_Reg32:%vreg31 R600_Reg128:%vreg6
%vreg32<def,tied1> = INSERT_SUBREG %vreg33<tied0>, %vreg31<kill>, sel_x; R600_Reg128:%vreg32,%vreg33 R600_Reg32:%vreg31
%vreg34<def> = COPY %vreg6:sel_y; R600_Reg32:%vreg34 R600_Reg128:%vreg6
%vreg35<def,tied1> = INSERT_SUBREG %vreg32<tied0>, %vreg34<kill>, sel_y; R600_Reg128:%vreg35,%vreg32 R600_Reg32:%vreg34
%vreg36<def,tied1> = INSERT_SUBREG %vreg35<tied0>, %vreg5, sel_z; R600_Reg128:%vreg36,%vreg35 R600_Reg32:%vreg5
%vreg37<def> = COPY %vreg6:sel_w; R600_Reg32:%vreg37 R600_Reg128:%vreg6
%vreg10<def,tied1> = INSERT_SUBREG %vreg36<tied0>, %vreg37<kill>, sel_w; R600_Reg128:%vreg10,%vreg36 R600_Reg32:%vreg37
%vreg9<def> = COPY %vreg6:sel_z; R600_Reg32:%vreg9 R600_Reg128:%vreg6
%vreg38<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 1; R600_Reg32:%vreg38
%vreg11<def> = ADD_INT 0, 0, 1, 0, 0, 0, %vreg7, 0, 0, 0, %vreg38<kill>, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg11,%vreg7,%vreg38
JUMP <BB#1>, pred:%noreg
    Successors according to CFG: BB#1

# End machine code for function main.

# *** IR Dump Before Peephole Optimizations ***:
# Machine code for function main: SSA
Function Live Ins: %T1_X in %vreg14, %T1_Y in %vreg15, %T1_Z in %vreg16, %T1_W in %vreg17
Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X

BB#0: derived from LLVM BB %0
    Live Ins: %T1_X %T1_Y %T1_Z %T1_W
%vreg17<def> = COPY %T1_W; R600_TReg32:%vreg17
%vreg16<def> = COPY %T1_Z; R600_TReg32:%vreg16
%vreg15<def> = COPY %T1_Y; R600_TReg32:%vreg15
%vreg14<def> = COPY %T1_X; R600_TReg32:%vreg14
%vreg18<def> = COPY %C1_X; R600_Reg32:%vreg18
%vreg20<def> = IMPLICIT_DEF; R600_Reg128:%vreg20
%vreg19<def,tied1> = INSERT_SUBREG %vreg20<tied0>, %vreg14, sel_x; R600_Reg128:%vreg19,%vreg20 R600_TReg32:%vreg14
%vreg2<def> = COPY %C1_Y; R600_Reg32:%vreg2
%vreg22<def> = IMPLICIT_DEF; R600_Reg128:%vreg22
%vreg21<def,tied1> = INSERT_SUBREG %vreg22<tied0>, %vreg18<kill>, sel_x; R600_Reg128:%vreg21,%vreg22 R600_Reg32:%vreg18
%vreg23<def,tied1> = INSERT_SUBREG %vreg19<tied0>, %vreg15, sel_y; R600_Reg128:%vreg23,%vreg19 R600_TReg32:%vreg15
%vreg24<def,tied1> = INSERT_SUBREG %vreg21<tied0>, %vreg2, sel_y; R600_Reg128:%vreg24,%vreg21 R600_Reg32:%vreg2
%vreg25<def> = COPY %C1_Z; R600_Reg32:%vreg25
%vreg26<def,tied1> = INSERT_SUBREG %vreg23<tied0>, %vreg16, sel_z; R600_Reg128:%vreg26,%vreg23 R600_TReg32:%vreg16
%vreg27<def,tied1> = INSERT_SUBREG %vreg24<tied0>, %vreg25<kill>, sel_z; R600_Reg128:%vreg27,%vreg24 R600_Reg32:%vreg25
%vreg28<def> = COPY %C1_W; R600_Reg32:%vreg28
%vreg3<def,tied1> = INSERT_SUBREG %vreg27<tied0>, %vreg28<kill>, sel_w; R600_Reg128:%vreg3,%vreg27 R600_Reg32:%vreg28
%vreg1<def,tied1> = INSERT_SUBREG %vreg26<tied0>, %vreg17, sel_w; R600_Reg128:%vreg1,%vreg26 R600_TReg32:%vreg17
%vreg13<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg13
%vreg0<def> = COPY %C0_X; R600_Reg32:%vreg0
%vreg33<def> = IMPLICIT_DEF; R600_Reg128:%vreg33
    Successors according to CFG: BB#1

BB#1: derived from LLVM BB %25
    Predecessors according to CFG: BB#0 BB#3
%vreg5<def> = PHI %vreg2, <BB#0>, %vreg9, <BB#3>; R600_Reg32:%vreg5,%vreg2,%vreg9
%vreg6<def> = PHI %vreg3, <BB#0>, %vreg10, <BB#3>; R600_Reg128:%vreg6,%vreg3,%vreg10
%vreg7<def> = PHI %vreg13, <BB#0>, %vreg11, <BB#3>; R600_Reg32:%vreg7,%vreg13,%vreg11
%vreg29<def> = SETGT_INT 0, 0, 1, 0, 0, 0, %vreg0, 0, 0, 0, %vreg7, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg29,%vreg0,%vreg7
%vreg30<def> = COPY %vreg29; GPRI32:%vreg30 R600_Reg32:%vreg29
%PREDICATE_BIT<def> = PRED_X %vreg30<kill>, 152, 16; GPRI32:%vreg30
JUMP <BB#3>, pred:%PREDICATE_BIT<kill>
JUMP <BB#2>, pred:%noreg
    Successors according to CFG: BB#2(4) BB#3(124)

BB#2: derived from LLVM BB %31
    Predecessors according to CFG: BB#1
%vreg39<def> = COPY %vreg6:sel_x; R600_Reg32:%vreg39 R600_Reg128:%vreg6
%T2_X<def> = COPY %vreg39; R600_Reg32:%vreg39
%vreg40<def> = COPY %vreg6:sel_y; R600_Reg32:%vreg40 R600_Reg128:%vreg6
%T2_Y<def> = COPY %vreg40; R600_Reg32:%vreg40
%vreg41<def> = COPY %vreg6:sel_z; R600_Reg32:%vreg41 R600_Reg128:%vreg6
%T2_Z<def> = COPY %vreg41; R600_Reg32:%vreg41
%vreg42<def> = COPY %vreg6:sel_w; R600_Reg32:%vreg42 R600_Reg128:%vreg6
%T2_W<def> = COPY %vreg42; R600_Reg32:%vreg42
%vreg43<def> = COPY %vreg1:sel_x; R600_Reg32:%vreg43 R600_Reg128:%vreg1
%T1_X<def> = COPY %vreg43; R600_Reg32:%vreg43
%vreg44<def> = COPY %vreg1:sel_y; R600_Reg32:%vreg44 R600_Reg128:%vreg1
%T1_Y<def> = COPY %vreg44; R600_Reg32:%vreg44
%vreg45<def> = COPY %vreg1:sel_z; R600_Reg32:%vreg45 R600_Reg128:%vreg1
%T1_Z<def> = COPY %vreg45; R600_Reg32:%vreg45
%vreg46<def> = COPY %vreg1:sel_w; R600_Reg32:%vreg46 R600_Reg128:%vreg1
%T1_W<def> = COPY %vreg46; R600_Reg32:%vreg46
RETURN

BB#3: derived from LLVM BB %41
    Predecessors according to CFG: BB#1
%vreg31<def> = COPY %vreg6:sel_x; R600_Reg32:%vreg31 R600_Reg128:%vreg6
%vreg32<def,tied1> = INSERT_SUBREG %vreg33<tied0>, %vreg31<kill>, sel_x; R600_Reg128:%vreg32,%vreg33 R600_Reg32:%vreg31
%vreg34<def> = COPY %vreg6:sel_y; R600_Reg32:%vreg34 R600_Reg128:%vreg6
%vreg35<def,tied1> = INSERT_SUBREG %vreg32<tied0>, %vreg34<kill>, sel_y; R600_Reg128:%vreg35,%vreg32 R600_Reg32:%vreg34
%vreg36<def,tied1> = INSERT_SUBREG %vreg35<tied0>, %vreg5, sel_z; R600_Reg128:%vreg36,%vreg35 R600_Reg32:%vreg5
%vreg37<def> = COPY %vreg6:sel_w; R600_Reg32:%vreg37 R600_Reg128:%vreg6
%vreg10<def,tied1> = INSERT_SUBREG %vreg36<tied0>, %vreg37<kill>, sel_w; R600_Reg128:%vreg10,%vreg36 R600_Reg32:%vreg37
%vreg9<def> = COPY %vreg6:sel_z; R600_Reg32:%vreg9 R600_Reg128:%vreg6
%vreg38<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 1; R600_Reg32:%vreg38
%vreg11<def> = ADD_INT 0, 0, 1, 0, 0, 0, %vreg7, 0, 0, 0, %vreg38<kill>, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg11,%vreg7,%vreg38
JUMP <BB#1>, pred:%noreg
    Successors according to CFG: BB#1

# End machine code for function main.

# *** IR Dump Before Process Implicit Definitions ***:
# Machine code for function main: SSA
Function Live Ins: %T1_X in %vreg14, %T1_Y in %vreg15, %T1_Z in %vreg16, %T1_W in %vreg17
Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X

BB#0: derived from LLVM BB %0
    Live Ins: %T1_X %T1_Y %T1_Z %T1_W
%vreg17<def> = COPY %T1_W; R600_TReg32:%vreg17
%vreg16<def> = COPY %T1_Z; R600_TReg32:%vreg16
%vreg15<def> = COPY %T1_Y; R600_TReg32:%vreg15
%vreg14<def> = COPY %T1_X; R600_TReg32:%vreg14
%vreg18<def> = COPY %C1_X; R600_Reg32:%vreg18
%vreg20<def> = IMPLICIT_DEF; R600_Reg128:%vreg20
%vreg19<def,tied1> = INSERT_SUBREG %vreg20<tied0>, %vreg14, sel_x; R600_Reg128:%vreg19,%vreg20 R600_TReg32:%vreg14
%vreg2<def> = COPY %C1_Y; R600_Reg32:%vreg2
%vreg22<def> = IMPLICIT_DEF; R600_Reg128:%vreg22
%vreg21<def,tied1> = INSERT_SUBREG %vreg22<tied0>, %vreg18<kill>, sel_x; R600_Reg128:%vreg21,%vreg22 R600_Reg32:%vreg18
%vreg23<def,tied1> = INSERT_SUBREG %vreg19<tied0>, %vreg15, sel_y; R600_Reg128:%vreg23,%vreg19 R600_TReg32:%vreg15
%vreg24<def,tied1> = INSERT_SUBREG %vreg21<tied0>, %vreg2, sel_y; R600_Reg128:%vreg24,%vreg21 R600_Reg32:%vreg2
%vreg25<def> = COPY %C1_Z; R600_Reg32:%vreg25
%vreg26<def,tied1> = INSERT_SUBREG %vreg23<tied0>, %vreg16, sel_z; R600_Reg128:%vreg26,%vreg23 R600_TReg32:%vreg16
%vreg27<def,tied1> = INSERT_SUBREG %vreg24<tied0>, %vreg25<kill>, sel_z; R600_Reg128:%vreg27,%vreg24 R600_Reg32:%vreg25
%vreg28<def> = COPY %C1_W; R600_Reg32:%vreg28
%vreg3<def,tied1> = INSERT_SUBREG %vreg27<tied0>, %vreg28<kill>, sel_w; R600_Reg128:%vreg3,%vreg27 R600_Reg32:%vreg28
%vreg1<def,tied1> = INSERT_SUBREG %vreg26<tied0>, %vreg17, sel_w; R600_Reg128:%vreg1,%vreg26 R600_TReg32:%vreg17
%vreg13<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg13
%vreg0<def> = COPY %C0_X; R600_Reg32:%vreg0
%vreg33<def> = IMPLICIT_DEF; R600_Reg128:%vreg33
    Successors according to CFG: BB#1

BB#1: derived from LLVM BB %25
    Predecessors according to CFG: BB#0 BB#3
%vreg5<def> = PHI %vreg2, <BB#0>, %vreg9, <BB#3>; R600_Reg32:%vreg5,%vreg2,%vreg9
%vreg6<def> = PHI %vreg3, <BB#0>, %vreg10, <BB#3>; R600_Reg128:%vreg6,%vreg3,%vreg10
%vreg7<def> = PHI %vreg13, <BB#0>, %vreg11, <BB#3>; R600_Reg32:%vreg7,%vreg13,%vreg11
%vreg29<def> = SETGT_INT 0, 0, 1, 0, 0, 0, %vreg0, 0, 0, 0, %vreg7, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg29,%vreg0,%vreg7
%vreg30<def> = COPY %vreg29; R600_Reg32:%vreg30,%vreg29
%PREDICATE_BIT<def> = PRED_X %vreg30<kill>, 152, 16; R600_Reg32:%vreg30
JUMP <BB#3>, pred:%PREDICATE_BIT<kill>
JUMP <BB#2>, pred:%noreg
    Successors according to CFG: BB#2(4) BB#3(124)

BB#2: derived from LLVM BB %31
    Predecessors according to CFG: BB#1
%vreg39<def> = COPY %vreg6:sel_x; R600_Reg32:%vreg39 R600_Reg128:%vreg6
%T2_X<def> = COPY %vreg39; R600_Reg32:%vreg39
%vreg40<def> = COPY %vreg6:sel_y; R600_Reg32:%vreg40 R600_Reg128:%vreg6
%T2_Y<def> = COPY %vreg40; R600_Reg32:%vreg40
%vreg41<def> = COPY %vreg6:sel_z; R600_Reg32:%vreg41 R600_Reg128:%vreg6
%T2_Z<def> = COPY %vreg41; R600_Reg32:%vreg41
%vreg42<def> = COPY %vreg6:sel_w; R600_Reg32:%vreg42 R600_Reg128:%vreg6
%T2_W<def> = COPY %vreg42; R600_Reg32:%vreg42
%vreg43<def> = COPY %vreg1:sel_x; R600_Reg32:%vreg43 R600_Reg128:%vreg1
%T1_X<def> = COPY %vreg43; R600_Reg32:%vreg43
%vreg44<def> = COPY %vreg1:sel_y; R600_Reg32:%vreg44 R600_Reg128:%vreg1
%T1_Y<def> = COPY %vreg44; R600_Reg32:%vreg44
%vreg45<def> = COPY %vreg1:sel_z; R600_Reg32:%vreg45 R600_Reg128:%vreg1
%T1_Z<def> = COPY %vreg45; R600_Reg32:%vreg45
%vreg46<def> = COPY %vreg1:sel_w; R600_Reg32:%vreg46 R600_Reg128:%vreg1
%T1_W<def> = COPY %vreg46; R600_Reg32:%vreg46
RETURN

BB#3: derived from LLVM BB %41
    Predecessors according to CFG: BB#1
%vreg31<def> = COPY %vreg6:sel_x; R600_Reg32:%vreg31 R600_Reg128:%vreg6
%vreg32<def,tied1> = INSERT_SUBREG %vreg33<tied0>, %vreg31<kill>, sel_x; R600_Reg128:%vreg32,%vreg33 R600_Reg32:%vreg31
%vreg34<def> = COPY %vreg6:sel_y; R600_Reg32:%vreg34 R600_Reg128:%vreg6
%vreg35<def,tied1> = INSERT_SUBREG %vreg32<tied0>, %vreg34<kill>, sel_y; R600_Reg128:%vreg35,%vreg32 R600_Reg32:%vreg34
%vreg36<def,tied1> = INSERT_SUBREG %vreg35<tied0>, %vreg5, sel_z; R600_Reg128:%vreg36,%vreg35 R600_Reg32:%vreg5
%vreg37<def> = COPY %vreg6:sel_w; R600_Reg32:%vreg37 R600_Reg128:%vreg6
%vreg10<def,tied1> = INSERT_SUBREG %vreg36<tied0>, %vreg37<kill>, sel_w; R600_Reg128:%vreg10,%vreg36 R600_Reg32:%vreg37
%vreg9<def> = COPY %vreg6:sel_z; R600_Reg32:%vreg9 R600_Reg128:%vreg6
%vreg38<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 1; R600_Reg32:%vreg38
%vreg11<def> = ADD_INT 0, 0, 1, 0, 0, 0, %vreg7, 0, 0, 0, %vreg38<kill>, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg11,%vreg7,%vreg38
JUMP <BB#1>, pred:%noreg
    Successors according to CFG: BB#1

# End machine code for function main.

# *** IR Dump Before Remove unreachable machine basic blocks ***:
# Machine code for function main: SSA
Function Live Ins: %T1_X in %vreg14, %T1_Y in %vreg15, %T1_Z in %vreg16, %T1_W in %vreg17
Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X

BB#0: derived from LLVM BB %0
    Live Ins: %T1_X %T1_Y %T1_Z %T1_W
%vreg17<def> = COPY %T1_W; R600_TReg32:%vreg17
%vreg16<def> = COPY %T1_Z; R600_TReg32:%vreg16
%vreg15<def> = COPY %T1_Y; R600_TReg32:%vreg15
%vreg14<def> = COPY %T1_X; R600_TReg32:%vreg14
%vreg18<def> = COPY %C1_X; R600_Reg32:%vreg18
%vreg19<def,tied1> = INSERT_SUBREG %vreg20<undef,tied0>, %vreg14, sel_x; R600_Reg128:%vreg19,%vreg20 R600_TReg32:%vreg14
%vreg2<def> = COPY %C1_Y; R600_Reg32:%vreg2
%vreg21<def,tied1> = INSERT_SUBREG %vreg22<undef,tied0>, %vreg18<kill>, sel_x; R600_Reg128:%vreg21,%vreg22 R600_Reg32:%vreg18
%vreg23<def,tied1> = INSERT_SUBREG %vreg19<tied0>, %vreg15, sel_y; R600_Reg128:%vreg23,%vreg19 R600_TReg32:%vreg15
%vreg24<def,tied1> = INSERT_SUBREG %vreg21<tied0>, %vreg2, sel_y; R600_Reg128:%vreg24,%vreg21 R600_Reg32:%vreg2
%vreg25<def> = COPY %C1_Z; R600_Reg32:%vreg25
%vreg26<def,tied1> = INSERT_SUBREG %vreg23<tied0>, %vreg16, sel_z; R600_Reg128:%vreg26,%vreg23 R600_TReg32:%vreg16
%vreg27<def,tied1> = INSERT_SUBREG %vreg24<tied0>, %vreg25<kill>, sel_z; R600_Reg128:%vreg27,%vreg24 R600_Reg32:%vreg25
%vreg28<def> = COPY %C1_W; R600_Reg32:%vreg28
%vreg3<def,tied1> = INSERT_SUBREG %vreg27<tied0>, %vreg28<kill>, sel_w; R600_Reg128:%vreg3,%vreg27 R600_Reg32:%vreg28
%vreg1<def,tied1> = INSERT_SUBREG %vreg26<tied0>, %vreg17, sel_w; R600_Reg128:%vreg1,%vreg26 R600_TReg32:%vreg17
%vreg13<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg13
%vreg0<def> = COPY %C0_X; R600_Reg32:%vreg0
    Successors according to CFG: BB#1

BB#1: derived from LLVM BB %25
    Predecessors according to CFG: BB#0 BB#3
%vreg5<def> = PHI %vreg2, <BB#0>, %vreg9, <BB#3>; R600_Reg32:%vreg5,%vreg2,%vreg9
%vreg6<def> = PHI %vreg3, <BB#0>, %vreg10, <BB#3>; R600_Reg128:%vreg6,%vreg3,%vreg10
%vreg7<def> = PHI %vreg13, <BB#0>, %vreg11, <BB#3>; R600_Reg32:%vreg7,%vreg13,%vreg11
%vreg29<def> = SETGT_INT 0, 0, 1, 0, 0, 0, %vreg0, 0, 0, 0, %vreg7, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg29,%vreg0,%vreg7
%vreg30<def> = COPY %vreg29; R600_Reg32:%vreg30,%vreg29
%PREDICATE_BIT<def> = PRED_X %vreg30<kill>, 152, 16; R600_Reg32:%vreg30
JUMP <BB#3>, pred:%PREDICATE_BIT<kill>
JUMP <BB#2>, pred:%noreg
    Successors according to CFG: BB#2(4) BB#3(124)

BB#2: derived from LLVM BB %31
    Predecessors according to CFG: BB#1
%vreg39<def> = COPY %vreg6:sel_x; R600_Reg32:%vreg39 R600_Reg128:%vreg6
%T2_X<def> = COPY %vreg39; R600_Reg32:%vreg39
%vreg40<def> = COPY %vreg6:sel_y; R600_Reg32:%vreg40 R600_Reg128:%vreg6
%T2_Y<def> = COPY %vreg40; R600_Reg32:%vreg40
%vreg41<def> = COPY %vreg6:sel_z; R600_Reg32:%vreg41 R600_Reg128:%vreg6
%T2_Z<def> = COPY %vreg41; R600_Reg32:%vreg41
%vreg42<def> = COPY %vreg6:sel_w; R600_Reg32:%vreg42 R600_Reg128:%vreg6
%T2_W<def> = COPY %vreg42; R600_Reg32:%vreg42
%vreg43<def> = COPY %vreg1:sel_x; R600_Reg32:%vreg43 R600_Reg128:%vreg1
%T1_X<def> = COPY %vreg43; R600_Reg32:%vreg43
%vreg44<def> = COPY %vreg1:sel_y; R600_Reg32:%vreg44 R600_Reg128:%vreg1
%T1_Y<def> = COPY %vreg44; R600_Reg32:%vreg44
%vreg45<def> = COPY %vreg1:sel_z; R600_Reg32:%vreg45 R600_Reg128:%vreg1
%T1_Z<def> = COPY %vreg45; R600_Reg32:%vreg45
%vreg46<def> = COPY %vreg1:sel_w; R600_Reg32:%vreg46 R600_Reg128:%vreg1
%T1_W<def> = COPY %vreg46; R600_Reg32:%vreg46
RETURN

BB#3: derived from LLVM BB %41
    Predecessors according to CFG: BB#1
%vreg31<def> = COPY %vreg6:sel_x; R600_Reg32:%vreg31 R600_Reg128:%vreg6
%vreg32<def,tied1> = INSERT_SUBREG %vreg33<undef,tied0>, %vreg31<kill>, sel_x; R600_Reg128:%vreg32,%vreg33 R600_Reg32:%vreg31
%vreg34<def> = COPY %vreg6:sel_y; R600_Reg32:%vreg34 R600_Reg128:%vreg6
%vreg35<def,tied1> = INSERT_SUBREG %vreg32<tied0>, %vreg34<kill>, sel_y; R600_Reg128:%vreg35,%vreg32 R600_Reg32:%vreg34
%vreg36<def,tied1> = INSERT_SUBREG %vreg35<tied0>, %vreg5, sel_z; R600_Reg128:%vreg36,%vreg35 R600_Reg32:%vreg5
%vreg37<def> = COPY %vreg6:sel_w; R600_Reg32:%vreg37 R600_Reg128:%vreg6
%vreg10<def,tied1> = INSERT_SUBREG %vreg36<tied0>, %vreg37<kill>, sel_w; R600_Reg128:%vreg10,%vreg36 R600_Reg32:%vreg37
%vreg9<def> = COPY %vreg6:sel_z; R600_Reg32:%vreg9 R600_Reg128:%vreg6
%vreg38<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 1; R600_Reg32:%vreg38
%vreg11<def> = ADD_INT 0, 0, 1, 0, 0, 0, %vreg7, 0, 0, 0, %vreg38<kill>, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg11,%vreg7,%vreg38
JUMP <BB#1>, pred:%noreg
    Successors according to CFG: BB#1

# End machine code for function main.

# *** IR Dump Before Live Variable Analysis ***:
# Machine code for function main: SSA
Function Live Ins: %T1_X in %vreg14, %T1_Y in %vreg15, %T1_Z in %vreg16, %T1_W in %vreg17
Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X

BB#0: derived from LLVM BB %0
    Live Ins: %T1_X %T1_Y %T1_Z %T1_W
%vreg17<def> = COPY %T1_W; R600_TReg32:%vreg17
%vreg16<def> = COPY %T1_Z; R600_TReg32:%vreg16
%vreg15<def> = COPY %T1_Y; R600_TReg32:%vreg15
%vreg14<def> = COPY %T1_X; R600_TReg32:%vreg14
%vreg18<def> = COPY %C1_X; R600_Reg32:%vreg18
%vreg19<def,tied1> = INSERT_SUBREG %vreg20<undef,tied0>, %vreg14, sel_x; R600_Reg128:%vreg19,%vreg20 R600_TReg32:%vreg14
%vreg2<def> = COPY %C1_Y; R600_Reg32:%vreg2
%vreg21<def,tied1> = INSERT_SUBREG %vreg22<undef,tied0>, %vreg18<kill>, sel_x; R600_Reg128:%vreg21,%vreg22 R600_Reg32:%vreg18
%vreg23<def,tied1> = INSERT_SUBREG %vreg19<tied0>, %vreg15, sel_y; R600_Reg128:%vreg23,%vreg19 R600_TReg32:%vreg15
%vreg24<def,tied1> = INSERT_SUBREG %vreg21<tied0>, %vreg2, sel_y; R600_Reg128:%vreg24,%vreg21 R600_Reg32:%vreg2
%vreg25<def> = COPY %C1_Z; R600_Reg32:%vreg25
%vreg26<def,tied1> = INSERT_SUBREG %vreg23<tied0>, %vreg16, sel_z; R600_Reg128:%vreg26,%vreg23 R600_TReg32:%vreg16
%vreg27<def,tied1> = INSERT_SUBREG %vreg24<tied0>, %vreg25<kill>, sel_z; R600_Reg128:%vreg27,%vreg24 R600_Reg32:%vreg25
%vreg28<def> = COPY %C1_W; R600_Reg32:%vreg28
%vreg3<def,tied1> = INSERT_SUBREG %vreg27<tied0>, %vreg28<kill>, sel_w; R600_Reg128:%vreg3,%vreg27 R600_Reg32:%vreg28
%vreg1<def,tied1> = INSERT_SUBREG %vreg26<tied0>, %vreg17, sel_w; R600_Reg128:%vreg1,%vreg26 R600_TReg32:%vreg17
%vreg13<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg13
%vreg0<def> = COPY %C0_X; R600_Reg32:%vreg0
    Successors according to CFG: BB#1

BB#1: derived from LLVM BB %25
    Predecessors according to CFG: BB#0 BB#3
%vreg5<def> = PHI %vreg2, <BB#0>, %vreg9, <BB#3>; R600_Reg32:%vreg5,%vreg2,%vreg9
%vreg6<def> = PHI %vreg3, <BB#0>, %vreg10, <BB#3>; R600_Reg128:%vreg6,%vreg3,%vreg10
%vreg7<def> = PHI %vreg13, <BB#0>, %vreg11, <BB#3>; R600_Reg32:%vreg7,%vreg13,%vreg11
%vreg29<def> = SETGT_INT 0, 0, 1, 0, 0, 0, %vreg0, 0, 0, 0, %vreg7, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg29,%vreg0,%vreg7
%vreg30<def> = COPY %vreg29; R600_Reg32:%vreg30,%vreg29
%PREDICATE_BIT<def> = PRED_X %vreg30<kill>, 152, 16; R600_Reg32:%vreg30
JUMP <BB#3>, pred:%PREDICATE_BIT<kill>
JUMP <BB#2>, pred:%noreg
    Successors according to CFG: BB#2(4) BB#3(124)

BB#2: derived from LLVM BB %31
    Predecessors according to CFG: BB#1
%vreg39<def> = COPY %vreg6:sel_x; R600_Reg32:%vreg39 R600_Reg128:%vreg6
%T2_X<def> = COPY %vreg39; R600_Reg32:%vreg39
%vreg40<def> = COPY %vreg6:sel_y; R600_Reg32:%vreg40 R600_Reg128:%vreg6
%T2_Y<def> = COPY %vreg40; R600_Reg32:%vreg40
%vreg41<def> = COPY %vreg6:sel_z; R600_Reg32:%vreg41 R600_Reg128:%vreg6
%T2_Z<def> = COPY %vreg41; R600_Reg32:%vreg41
%vreg42<def> = COPY %vreg6:sel_w; R600_Reg32:%vreg42 R600_Reg128:%vreg6
%T2_W<def> = COPY %vreg42; R600_Reg32:%vreg42
%vreg43<def> = COPY %vreg1:sel_x; R600_Reg32:%vreg43 R600_Reg128:%vreg1
%T1_X<def> = COPY %vreg43; R600_Reg32:%vreg43
%vreg44<def> = COPY %vreg1:sel_y; R600_Reg32:%vreg44 R600_Reg128:%vreg1
%T1_Y<def> = COPY %vreg44; R600_Reg32:%vreg44
%vreg45<def> = COPY %vreg1:sel_z; R600_Reg32:%vreg45 R600_Reg128:%vreg1
%T1_Z<def> = COPY %vreg45; R600_Reg32:%vreg45
%vreg46<def> = COPY %vreg1:sel_w; R600_Reg32:%vreg46 R600_Reg128:%vreg1
%T1_W<def> = COPY %vreg46; R600_Reg32:%vreg46
RETURN

BB#3: derived from LLVM BB %41
    Predecessors according to CFG: BB#1
%vreg31<def> = COPY %vreg6:sel_x; R600_Reg32:%vreg31 R600_Reg128:%vreg6
%vreg32<def,tied1> = INSERT_SUBREG %vreg33<undef,tied0>, %vreg31<kill>, sel_x; R600_Reg128:%vreg32,%vreg33 R600_Reg32:%vreg31
%vreg34<def> = COPY %vreg6:sel_y; R600_Reg32:%vreg34 R600_Reg128:%vreg6
%vreg35<def,tied1> = INSERT_SUBREG %vreg32<tied0>, %vreg34<kill>, sel_y; R600_Reg128:%vreg35,%vreg32 R600_Reg32:%vreg34
%vreg36<def,tied1> = INSERT_SUBREG %vreg35<tied0>, %vreg5, sel_z; R600_Reg128:%vreg36,%vreg35 R600_Reg32:%vreg5
%vreg37<def> = COPY %vreg6:sel_w; R600_Reg32:%vreg37 R600_Reg128:%vreg6
%vreg10<def,tied1> = INSERT_SUBREG %vreg36<tied0>, %vreg37<kill>, sel_w; R600_Reg128:%vreg10,%vreg36 R600_Reg32:%vreg37
%vreg9<def> = COPY %vreg6:sel_z; R600_Reg32:%vreg9 R600_Reg128:%vreg6
%vreg38<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 1; R600_Reg32:%vreg38
%vreg11<def> = ADD_INT 0, 0, 1, 0, 0, 0, %vreg7, 0, 0, 0, %vreg38<kill>, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg11,%vreg7,%vreg38
JUMP <BB#1>, pred:%noreg
    Successors according to CFG: BB#1

# End machine code for function main.

# *** IR Dump Before Eliminate PHI nodes for register allocation ***:
# Machine code for function main: SSA
Function Live Ins: %T1_X in %vreg14, %T1_Y in %vreg15, %T1_Z in %vreg16, %T1_W in %vreg17
Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X

BB#0: derived from LLVM BB %0
    Live Ins: %T1_X %T1_Y %T1_Z %T1_W
%vreg17<def> = COPY %T1_W<kill>; R600_TReg32:%vreg17
%vreg16<def> = COPY %T1_Z<kill>; R600_TReg32:%vreg16
%vreg15<def> = COPY %T1_Y<kill>; R600_TReg32:%vreg15
%vreg14<def> = COPY %T1_X<kill>; R600_TReg32:%vreg14
%vreg18<def> = COPY %C1_X; R600_Reg32:%vreg18
%vreg19<def,tied1> = INSERT_SUBREG %vreg20<undef,tied0>, %vreg14<kill>, sel_x; R600_Reg128:%vreg19,%vreg20 R600_TReg32:%vreg14
%vreg2<def> = COPY %C1_Y; R600_Reg32:%vreg2
%vreg21<def,tied1> = INSERT_SUBREG %vreg22<undef,tied0>, %vreg18<kill>, sel_x; R600_Reg128:%vreg21,%vreg22 R600_Reg32:%vreg18
%vreg23<def,tied1> = INSERT_SUBREG %vreg19<kill,tied0>, %vreg15<kill>, sel_y; R600_Reg128:%vreg23,%vreg19 R600_TReg32:%vreg15
%vreg24<def,tied1> = INSERT_SUBREG %vreg21<kill,tied0>, %vreg2, sel_y; R600_Reg128:%vreg24,%vreg21 R600_Reg32:%vreg2
%vreg25<def> = COPY %C1_Z; R600_Reg32:%vreg25
%vreg26<def,tied1> = INSERT_SUBREG %vreg23<kill,tied0>, %vreg16<kill>, sel_z; R600_Reg128:%vreg26,%vreg23 R600_TReg32:%vreg16
%vreg27<def,tied1> = INSERT_SUBREG %vreg24<kill,tied0>, %vreg25<kill>, sel_z; R600_Reg128:%vreg27,%vreg24 R600_Reg32:%vreg25
%vreg28<def> = COPY %C1_W; R600_Reg32:%vreg28
%vreg3<def,tied1> = INSERT_SUBREG %vreg27<kill,tied0>, %vreg28<kill>, sel_w; R600_Reg128:%vreg3,%vreg27 R600_Reg32:%vreg28
%vreg1<def,tied1> = INSERT_SUBREG %vreg26<kill,tied0>, %vreg17<kill>, sel_w; R600_Reg128:%vreg1,%vreg26 R600_TReg32:%vreg17
%vreg13<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg13
%vreg0<def> = COPY %C0_X; R600_Reg32:%vreg0
    Successors according to CFG: BB#1

BB#1: derived from LLVM BB %25
    Predecessors according to CFG: BB#0 BB#3
%vreg5<def> = PHI %vreg2, <BB#0>, %vreg9, <BB#3>; R600_Reg32:%vreg5,%vreg2,%vreg9
%vreg6<def> = PHI %vreg3, <BB#0>, %vreg10, <BB#3>; R600_Reg128:%vreg6,%vreg3,%vreg10
%vreg7<def> = PHI %vreg13, <BB#0>, %vreg11, <BB#3>; R600_Reg32:%vreg7,%vreg13,%vreg11
%vreg29<def> = SETGT_INT 0, 0, 1, 0, 0, 0, %vreg0, 0, 0, 0, %vreg7, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg29,%vreg0,%vreg7
%vreg30<def> = COPY %vreg29<kill>; R600_Reg32:%vreg30,%vreg29
%PREDICATE_BIT<def> = PRED_X %vreg30<kill>, 152, 16; R600_Reg32:%vreg30
JUMP <BB#3>, pred:%PREDICATE_BIT
JUMP <BB#2>, pred:%noreg
    Successors according to CFG: BB#2(4) BB#3(124)

BB#2: derived from LLVM BB %31
    Predecessors according to CFG: BB#1
%vreg39<def> = COPY %vreg6:sel_x; R600_Reg32:%vreg39 R600_Reg128:%vreg6
%T2_X<def> = COPY %vreg39<kill>; R600_Reg32:%vreg39
%vreg40<def> = COPY %vreg6:sel_y; R600_Reg32:%vreg40 R600_Reg128:%vreg6
%T2_Y<def> = COPY %vreg40<kill>; R600_Reg32:%vreg40
%vreg41<def> = COPY %vreg6:sel_z; R600_Reg32:%vreg41 R600_Reg128:%vreg6
%T2_Z<def> = COPY %vreg41<kill>; R600_Reg32:%vreg41
%vreg42<def> = COPY %vreg6:sel_w<kill>; R600_Reg32:%vreg42 R600_Reg128:%vreg6
%T2_W<def> = COPY %vreg42<kill>; R600_Reg32:%vreg42
%vreg43<def> = COPY %vreg1:sel_x; R600_Reg32:%vreg43 R600_Reg128:%vreg1
%T1_X<def> = COPY %vreg43<kill>; R600_Reg32:%vreg43
%vreg44<def> = COPY %vreg1:sel_y; R600_Reg32:%vreg44 R600_Reg128:%vreg1
%T1_Y<def> = COPY %vreg44<kill>; R600_Reg32:%vreg44
%vreg45<def> = COPY %vreg1:sel_z; R600_Reg32:%vreg45 R600_Reg128:%vreg1
%T1_Z<def> = COPY %vreg45<kill>; R600_Reg32:%vreg45
%vreg46<def> = COPY %vreg1:sel_w<kill>; R600_Reg32:%vreg46 R600_Reg128:%vreg1
%T1_W<def> = COPY %vreg46<kill>; R600_Reg32:%vreg46
RETURN %T1_W<imp-use,kill>, %T1_Z<imp-use,kill>, %T1_Y<imp-use,kill>, %T1_X<imp-use,kill>, %T2_W<imp-use,kill>, %T2_Z<imp-use,kill>, %T2_Y<imp-use,kill>, %T2_X<imp-use,kill>

BB#3: derived from LLVM BB %41
    Predecessors according to CFG: BB#1
%vreg31<def> = COPY %vreg6:sel_x; R600_Reg32:%vreg31 R600_Reg128:%vreg6
%vreg32<def,tied1> = INSERT_SUBREG %vreg33<undef,tied0>, %vreg31<kill>, sel_x; R600_Reg128:%vreg32,%vreg33 R600_Reg32:%vreg31
%vreg34<def> = COPY %vreg6:sel_y; R600_Reg32:%vreg34 R600_Reg128:%vreg6
%vreg35<def,tied1> = INSERT_SUBREG %vreg32<kill,tied0>, %vreg34<kill>, sel_y; R600_Reg128:%vreg35,%vreg32 R600_Reg32:%vreg34
%vreg36<def,tied1> = INSERT_SUBREG %vreg35<kill,tied0>, %vreg5<kill>, sel_z; R600_Reg128:%vreg36,%vreg35 R600_Reg32:%vreg5
%vreg37<def> = COPY %vreg6:sel_w; R600_Reg32:%vreg37 R600_Reg128:%vreg6
%vreg10<def,tied1> = INSERT_SUBREG %vreg36<kill,tied0>, %vreg37<kill>, sel_w; R600_Reg128:%vreg10,%vreg36 R600_Reg32:%vreg37
%vreg9<def> = COPY %vreg6:sel_z<kill>; R600_Reg32:%vreg9 R600_Reg128:%vreg6
%vreg38<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 1; R600_Reg32:%vreg38
%vreg11<def> = ADD_INT 0, 0, 1, 0, 0, 0, %vreg7<kill>, 0, 0, 0, %vreg38<kill>, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg11,%vreg7,%vreg38
JUMP <BB#1>, pred:%noreg
    Successors according to CFG: BB#1

# End machine code for function main.

# *** IR Dump Before Two-Address instruction pass ***:
# Machine code for function main: Post SSA
Function Live Ins: %T1_X in %vreg14, %T1_Y in %vreg15, %T1_Z in %vreg16, %T1_W in %vreg17
Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X

BB#0: derived from LLVM BB %0
    Live Ins: %T1_X %T1_Y %T1_Z %T1_W
%vreg17<def> = COPY %T1_W<kill>; R600_TReg32:%vreg17
%vreg16<def> = COPY %T1_Z<kill>; R600_TReg32:%vreg16
%vreg15<def> = COPY %T1_Y<kill>; R600_TReg32:%vreg15
%vreg14<def> = COPY %T1_X<kill>; R600_TReg32:%vreg14
%vreg18<def> = COPY %C1_X; R600_Reg32:%vreg18
%vreg19<def,tied1> = INSERT_SUBREG %vreg20<undef,tied0>, %vreg14<kill>, sel_x; R600_Reg128:%vreg19,%vreg20 R600_TReg32:%vreg14
%vreg2<def> = COPY %C1_Y; R600_Reg32:%vreg2
%vreg21<def,tied1> = INSERT_SUBREG %vreg22<undef,tied0>, %vreg18<kill>, sel_x; R600_Reg128:%vreg21,%vreg22 R600_Reg32:%vreg18
%vreg23<def,tied1> = INSERT_SUBREG %vreg19<kill,tied0>, %vreg15<kill>, sel_y; R600_Reg128:%vreg23,%vreg19 R600_TReg32:%vreg15
%vreg24<def,tied1> = INSERT_SUBREG %vreg21<kill,tied0>, %vreg2, sel_y; R600_Reg128:%vreg24,%vreg21 R600_Reg32:%vreg2
%vreg25<def> = COPY %C1_Z; R600_Reg32:%vreg25
%vreg26<def,tied1> = INSERT_SUBREG %vreg23<kill,tied0>, %vreg16<kill>, sel_z; R600_Reg128:%vreg26,%vreg23 R600_TReg32:%vreg16
%vreg27<def,tied1> = INSERT_SUBREG %vreg24<kill,tied0>, %vreg25<kill>, sel_z; R600_Reg128:%vreg27,%vreg24 R600_Reg32:%vreg25
%vreg28<def> = COPY %C1_W; R600_Reg32:%vreg28
%vreg3<def,tied1> = INSERT_SUBREG %vreg27<kill,tied0>, %vreg28<kill>, sel_w; R600_Reg128:%vreg3,%vreg27 R600_Reg32:%vreg28
%vreg1<def,tied1> = INSERT_SUBREG %vreg26<kill,tied0>, %vreg17<kill>, sel_w; R600_Reg128:%vreg1,%vreg26 R600_TReg32:%vreg17
%vreg13<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg13
%vreg0<def> = COPY %C0_X; R600_Reg32:%vreg0
%vreg47<def> = COPY %vreg2<kill>; R600_Reg32:%vreg47,%vreg2
%vreg48<def> = COPY %vreg3<kill>; R600_Reg128:%vreg48,%vreg3
%vreg49<def> = COPY %vreg13<kill>; R600_Reg32:%vreg49,%vreg13
    Successors according to CFG: BB#1

BB#1: derived from LLVM BB %25
    Predecessors according to CFG: BB#0 BB#3
%vreg5<def> = COPY %vreg47<kill>; R600_Reg32:%vreg5,%vreg47
%vreg6<def> = COPY %vreg48<kill>; R600_Reg128:%vreg6,%vreg48
%vreg7<def> = COPY %vreg49<kill>; R600_Reg32:%vreg7,%vreg49
%vreg29<def> = SETGT_INT 0, 0, 1, 0, 0, 0, %vreg0, 0, 0, 0, %vreg7, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg29,%vreg0,%vreg7
%vreg30<def> = COPY %vreg29<kill>; R600_Reg32:%vreg30,%vreg29
%PREDICATE_BIT<def> = PRED_X %vreg30<kill>, 152, 16; R600_Reg32:%vreg30
JUMP <BB#3>, pred:%PREDICATE_BIT
JUMP <BB#2>, pred:%noreg
    Successors according to CFG: BB#2(4) BB#3(124)

BB#2: derived from LLVM BB %31
    Predecessors according to CFG: BB#1
%vreg39<def> = COPY %vreg6:sel_x; R600_Reg32:%vreg39 R600_Reg128:%vreg6
%T2_X<def> = COPY %vreg39<kill>; R600_Reg32:%vreg39
%vreg40<def> = COPY %vreg6:sel_y; R600_Reg32:%vreg40 R600_Reg128:%vreg6
%T2_Y<def> = COPY %vreg40<kill>; R600_Reg32:%vreg40
%vreg41<def> = COPY %vreg6:sel_z; R600_Reg32:%vreg41 R600_Reg128:%vreg6
%T2_Z<def> = COPY %vreg41<kill>; R600_Reg32:%vreg41
%vreg42<def> = COPY %vreg6:sel_w<kill>; R600_Reg32:%vreg42 R600_Reg128:%vreg6
%T2_W<def> = COPY %vreg42<kill>; R600_Reg32:%vreg42
%vreg43<def> = COPY %vreg1:sel_x; R600_Reg32:%vreg43 R600_Reg128:%vreg1
%T1_X<def> = COPY %vreg43<kill>; R600_Reg32:%vreg43
%vreg44<def> = COPY %vreg1:sel_y; R600_Reg32:%vreg44 R600_Reg128:%vreg1
%T1_Y<def> = COPY %vreg44<kill>; R600_Reg32:%vreg44
%vreg45<def> = COPY %vreg1:sel_z; R600_Reg32:%vreg45 R600_Reg128:%vreg1
%T1_Z<def> = COPY %vreg45<kill>; R600_Reg32:%vreg45
%vreg46<def> = COPY %vreg1:sel_w<kill>; R600_Reg32:%vreg46 R600_Reg128:%vreg1
%T1_W<def> = COPY %vreg46<kill>; R600_Reg32:%vreg46
RETURN %T1_W<imp-use,kill>, %T1_Z<imp-use,kill>, %T1_Y<imp-use,kill>, %T1_X<imp-use,kill>, %T2_W<imp-use,kill>, %T2_Z<imp-use,kill>, %T2_Y<imp-use,kill>, %T2_X<imp-use,kill>

BB#3: derived from LLVM BB %41
    Predecessors according to CFG: BB#1
%vreg31<def> = COPY %vreg6:sel_x; R600_Reg32:%vreg31 R600_Reg128:%vreg6
%vreg32<def,tied1> = INSERT_SUBREG %vreg33<undef,tied0>, %vreg31<kill>, sel_x; R600_Reg128:%vreg32,%vreg33 R600_Reg32:%vreg31
%vreg34<def> = COPY %vreg6:sel_y; R600_Reg32:%vreg34 R600_Reg128:%vreg6
%vreg35<def,tied1> = INSERT_SUBREG %vreg32<kill,tied0>, %vreg34<kill>, sel_y; R600_Reg128:%vreg35,%vreg32 R600_Reg32:%vreg34
%vreg36<def,tied1> = INSERT_SUBREG %vreg35<kill,tied0>, %vreg5<kill>, sel_z; R600_Reg128:%vreg36,%vreg35 R600_Reg32:%vreg5
%vreg37<def> = COPY %vreg6:sel_w; R600_Reg32:%vreg37 R600_Reg128:%vreg6
%vreg10<def,tied1> = INSERT_SUBREG %vreg36<kill,tied0>, %vreg37<kill>, sel_w; R600_Reg128:%vreg10,%vreg36 R600_Reg32:%vreg37
%vreg9<def> = COPY %vreg6:sel_z<kill>; R600_Reg32:%vreg9 R600_Reg128:%vreg6
%vreg38<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 1; R600_Reg32:%vreg38
%vreg11<def> = ADD_INT 0, 0, 1, 0, 0, 0, %vreg7<kill>, 0, 0, 0, %vreg38<kill>, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg11,%vreg7,%vreg38
%vreg47<def> = COPY %vreg9<kill>; R600_Reg32:%vreg47,%vreg9
%vreg48<def> = COPY %vreg10<kill>; R600_Reg128:%vreg48,%vreg10
%vreg49<def> = COPY %vreg11<kill>; R600_Reg32:%vreg49,%vreg11
JUMP <BB#1>, pred:%noreg
    Successors according to CFG: BB#1

# End machine code for function main.

# *** IR Dump Before Slot index numbering ***:
# Machine code for function main: Post SSA
Function Live Ins: %T1_X in %vreg14, %T1_Y in %vreg15, %T1_Z in %vreg16, %T1_W in %vreg17
Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X

BB#0: derived from LLVM BB %0
    Live Ins: %T1_X %T1_Y %T1_Z %T1_W
%vreg17<def> = COPY %T1_W<kill>; R600_TReg32:%vreg17
%vreg16<def> = COPY %T1_Z<kill>; R600_TReg32:%vreg16
%vreg15<def> = COPY %T1_Y<kill>; R600_TReg32:%vreg15
%vreg14<def> = COPY %T1_X<kill>; R600_TReg32:%vreg14
%vreg18<def> = COPY %C1_X; R600_Reg32:%vreg18
%vreg19:sel_x<def,read-undef> = COPY %vreg14<kill>; R600_Reg128:%vreg19 R600_TReg32:%vreg14
%vreg2<def> = COPY %C1_Y; R600_Reg32:%vreg2
%vreg21:sel_x<def,read-undef> = COPY %vreg18<kill>; R600_Reg128:%vreg21 R600_Reg32:%vreg18
%vreg23<def> = COPY %vreg19<kill>; R600_Reg128:%vreg23,%vreg19
%vreg23:sel_y<def> = COPY %vreg15<kill>; R600_Reg128:%vreg23 R600_TReg32:%vreg15
%vreg24<def> = COPY %vreg21<kill>; R600_Reg128:%vreg24,%vreg21
%vreg24:sel_y<def> = COPY %vreg2; R600_Reg128:%vreg24 R600_Reg32:%vreg2
%vreg25<def> = COPY %C1_Z; R600_Reg32:%vreg25
%vreg26<def> = COPY %vreg23<kill>; R600_Reg128:%vreg26,%vreg23
%vreg26:sel_z<def> = COPY %vreg16<kill>; R600_Reg128:%vreg26 R600_TReg32:%vreg16
%vreg27<def> = COPY %vreg24<kill>; R600_Reg128:%vreg27,%vreg24
%vreg27:sel_z<def> = COPY %vreg25<kill>; R600_Reg128:%vreg27 R600_Reg32:%vreg25
%vreg28<def> = COPY %C1_W; R600_Reg32:%vreg28
%vreg3<def> = COPY %vreg27<kill>; R600_Reg128:%vreg3,%vreg27
%vreg3:sel_w<def> = COPY %vreg28<kill>; R600_Reg128:%vreg3 R600_Reg32:%vreg28
%vreg1<def> = COPY %vreg26<kill>; R600_Reg128:%vreg1,%vreg26
%vreg1:sel_w<def> = COPY %vreg17<kill>; R600_Reg128:%vreg1 R600_TReg32:%vreg17
%vreg13<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg13
%vreg0<def> = COPY %C0_X; R600_Reg32:%vreg0
%vreg47<def> = COPY %vreg2<kill>; R600_Reg32:%vreg47,%vreg2
%vreg48<def> = COPY %vreg3<kill>; R600_Reg128:%vreg48,%vreg3
%vreg49<def> = COPY %vreg13<kill>; R600_Reg32:%vreg49,%vreg13
    Successors according to CFG: BB#1

BB#1: derived from LLVM BB %25
    Predecessors according to CFG: BB#0 BB#3
%vreg5<def> = COPY %vreg47<kill>; R600_Reg32:%vreg5,%vreg47
%vreg6<def> = COPY %vreg48<kill>; R600_Reg128:%vreg6,%vreg48
%vreg7<def> = COPY %vreg49<kill>; R600_Reg32:%vreg7,%vreg49
%vreg29<def> = SETGT_INT 0, 0, 1, 0, 0, 0, %vreg0, 0, 0, 0, %vreg7, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg29,%vreg0,%vreg7
%vreg30<def> = COPY %vreg29<kill>; R600_Reg32:%vreg30,%vreg29
%PREDICATE_BIT<def> = PRED_X %vreg30<kill>, 152, 16; R600_Reg32:%vreg30
JUMP <BB#3>, pred:%PREDICATE_BIT
JUMP <BB#2>, pred:%noreg
    Successors according to CFG: BB#2(4) BB#3(124)

BB#2: derived from LLVM BB %31
    Predecessors according to CFG: BB#1
%vreg39<def> = COPY %vreg6:sel_x; R600_Reg32:%vreg39 R600_Reg128:%vreg6
%T2_X<def> = COPY %vreg39<kill>; R600_Reg32:%vreg39
%vreg40<def> = COPY %vreg6:sel_y; R600_Reg32:%vreg40 R600_Reg128:%vreg6
%T2_Y<def> = COPY %vreg40<kill>; R600_Reg32:%vreg40
%vreg41<def> = COPY %vreg6:sel_z; R600_Reg32:%vreg41 R600_Reg128:%vreg6
%T2_Z<def> = COPY %vreg41<kill>; R600_Reg32:%vreg41
%vreg42<def> = COPY %vreg6:sel_w<kill>; R600_Reg32:%vreg42 R600_Reg128:%vreg6
%T2_W<def> = COPY %vreg42<kill>; R600_Reg32:%vreg42
%vreg43<def> = COPY %vreg1:sel_x; R600_Reg32:%vreg43 R600_Reg128:%vreg1
%T1_X<def> = COPY %vreg43<kill>; R600_Reg32:%vreg43
%vreg44<def> = COPY %vreg1:sel_y; R600_Reg32:%vreg44 R600_Reg128:%vreg1
%T1_Y<def> = COPY %vreg44<kill>; R600_Reg32:%vreg44
%vreg45<def> = COPY %vreg1:sel_z; R600_Reg32:%vreg45 R600_Reg128:%vreg1
%T1_Z<def> = COPY %vreg45<kill>; R600_Reg32:%vreg45
%vreg46<def> = COPY %vreg1:sel_w<kill>; R600_Reg32:%vreg46 R600_Reg128:%vreg1
%T1_W<def> = COPY %vreg46<kill>; R600_Reg32:%vreg46
RETURN %T1_W<imp-use,kill>, %T1_Z<imp-use,kill>, %T1_Y<imp-use,kill>, %T1_X<imp-use,kill>, %T2_W<imp-use,kill>, %T2_Z<imp-use,kill>, %T2_Y<imp-use,kill>, %T2_X<imp-use,kill>

BB#3: derived from LLVM BB %41
    Predecessors according to CFG: BB#1
%vreg31<def> = COPY %vreg6:sel_x; R600_Reg32:%vreg31 R600_Reg128:%vreg6
%vreg32:sel_x<def,read-undef> = COPY %vreg31<kill>; R600_Reg128:%vreg32 R600_Reg32:%vreg31
%vreg34<def> = COPY %vreg6:sel_y; R600_Reg32:%vreg34 R600_Reg128:%vreg6
%vreg35<def> = COPY %vreg32<kill>; R600_Reg128:%vreg35,%vreg32
%vreg35:sel_y<def> = COPY %vreg34<kill>; R600_Reg128:%vreg35 R600_Reg32:%vreg34
%vreg36<def> = COPY %vreg35<kill>; R600_Reg128:%vreg36,%vreg35
%vreg36:sel_z<def> = COPY %vreg5<kill>; R600_Reg128:%vreg36 R600_Reg32:%vreg5
%vreg37<def> = COPY %vreg6:sel_w; R600_Reg32:%vreg37 R600_Reg128:%vreg6
%vreg10<def> = COPY %vreg36<kill>; R600_Reg128:%vreg10,%vreg36
%vreg10:sel_w<def> = COPY %vreg37<kill>; R600_Reg128:%vreg10 R600_Reg32:%vreg37
%vreg9<def> = COPY %vreg6:sel_z<kill>; R600_Reg32:%vreg9 R600_Reg128:%vreg6
%vreg38<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 1; R600_Reg32:%vreg38
%vreg11<def> = ADD_INT 0, 0, 1, 0, 0, 0, %vreg7<kill>, 0, 0, 0, %vreg38<kill>, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg11,%vreg7,%vreg38
%vreg47<def> = COPY %vreg9<kill>; R600_Reg32:%vreg47,%vreg9
%vreg48<def> = COPY %vreg10<kill>; R600_Reg128:%vreg48,%vreg10
%vreg49<def> = COPY %vreg11<kill>; R600_Reg32:%vreg49,%vreg11
JUMP <BB#1>, pred:%noreg
    Successors according to CFG: BB#1

# End machine code for function main.

# *** IR Dump Before Live Interval Analysis ***:
# Machine code for function main: Post SSA
Function Live Ins: %T1_X in %vreg14, %T1_Y in %vreg15, %T1_Z in %vreg16, %T1_W in %vreg17
Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X

0BBB#0: derived from LLVM BB %0
    Live Ins: %T1_X %T1_Y %T1_Z %T1_W
16B%vreg17<def> = COPY %T1_W<kill>; R600_TReg32:%vreg17
32B%vreg16<def> = COPY %T1_Z<kill>; R600_TReg32:%vreg16
48B%vreg15<def> = COPY %T1_Y<kill>; R600_TReg32:%vreg15
64B%vreg14<def> = COPY %T1_X<kill>; R600_TReg32:%vreg14
80B%vreg18<def> = COPY %C1_X; R600_Reg32:%vreg18
96B%vreg19:sel_x<def,read-undef> = COPY %vreg14<kill>; R600_Reg128:%vreg19 R600_TReg32:%vreg14
112B%vreg2<def> = COPY %C1_Y; R600_Reg32:%vreg2
128B%vreg21:sel_x<def,read-undef> = COPY %vreg18<kill>; R600_Reg128:%vreg21 R600_Reg32:%vreg18
144B%vreg23<def> = COPY %vreg19<kill>; R600_Reg128:%vreg23,%vreg19
160B%vreg23:sel_y<def> = COPY %vreg15<kill>; R600_Reg128:%vreg23 R600_TReg32:%vreg15
176B%vreg24<def> = COPY %vreg21<kill>; R600_Reg128:%vreg24,%vreg21
192B%vreg24:sel_y<def> = COPY %vreg2; R600_Reg128:%vreg24 R600_Reg32:%vreg2
208B%vreg25<def> = COPY %C1_Z; R600_Reg32:%vreg25
224B%vreg26<def> = COPY %vreg23<kill>; R600_Reg128:%vreg26,%vreg23
240B%vreg26:sel_z<def> = COPY %vreg16<kill>; R600_Reg128:%vreg26 R600_TReg32:%vreg16
256B%vreg27<def> = COPY %vreg24<kill>; R600_Reg128:%vreg27,%vreg24
272B%vreg27:sel_z<def> = COPY %vreg25<kill>; R600_Reg128:%vreg27 R600_Reg32:%vreg25
288B%vreg28<def> = COPY %C1_W; R600_Reg32:%vreg28
304B%vreg3<def> = COPY %vreg27<kill>; R600_Reg128:%vreg3,%vreg27
320B%vreg3:sel_w<def> = COPY %vreg28<kill>; R600_Reg128:%vreg3 R600_Reg32:%vreg28
336B%vreg1<def> = COPY %vreg26<kill>; R600_Reg128:%vreg1,%vreg26
352B%vreg1:sel_w<def> = COPY %vreg17<kill>; R600_Reg128:%vreg1 R600_TReg32:%vreg17
368B%vreg13<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg13
384B%vreg0<def> = COPY %C0_X; R600_Reg32:%vreg0
400B%vreg47<def> = COPY %vreg2<kill>; R600_Reg32:%vreg47,%vreg2
416B%vreg48<def> = COPY %vreg3<kill>; R600_Reg128:%vreg48,%vreg3
432B%vreg49<def> = COPY %vreg13<kill>; R600_Reg32:%vreg49,%vreg13
    Successors according to CFG: BB#1

448BBB#1: derived from LLVM BB %25
    Predecessors according to CFG: BB#0 BB#3
464B%vreg5<def> = COPY %vreg47<kill>; R600_Reg32:%vreg5,%vreg47
480B%vreg6<def> = COPY %vreg48<kill>; R600_Reg128:%vreg6,%vreg48
496B%vreg7<def> = COPY %vreg49<kill>; R600_Reg32:%vreg7,%vreg49
512B%vreg29<def> = SETGT_INT 0, 0, 1, 0, 0, 0, %vreg0, 0, 0, 0, %vreg7, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg29,%vreg0,%vreg7
528B%vreg30<def> = COPY %vreg29<kill>; R600_Reg32:%vreg30,%vreg29
544B%PREDICATE_BIT<def> = PRED_X %vreg30<kill>, 152, 16; R600_Reg32:%vreg30
560BJUMP <BB#3>, pred:%PREDICATE_BIT
576BJUMP <BB#2>, pred:%noreg
    Successors according to CFG: BB#2(4) BB#3(124)

592BBB#2: derived from LLVM BB %31
    Predecessors according to CFG: BB#1
608B%vreg39<def> = COPY %vreg6:sel_x; R600_Reg32:%vreg39 R600_Reg128:%vreg6
624B%T2_X<def> = COPY %vreg39<kill>; R600_Reg32:%vreg39
640B%vreg40<def> = COPY %vreg6:sel_y; R600_Reg32:%vreg40 R600_Reg128:%vreg6
656B%T2_Y<def> = COPY %vreg40<kill>; R600_Reg32:%vreg40
672B%vreg41<def> = COPY %vreg6:sel_z; R600_Reg32:%vreg41 R600_Reg128:%vreg6
688B%T2_Z<def> = COPY %vreg41<kill>; R600_Reg32:%vreg41
704B%vreg42<def> = COPY %vreg6:sel_w<kill>; R600_Reg32:%vreg42 R600_Reg128:%vreg6
720B%T2_W<def> = COPY %vreg42<kill>; R600_Reg32:%vreg42
736B%vreg43<def> = COPY %vreg1:sel_x; R600_Reg32:%vreg43 R600_Reg128:%vreg1
752B%T1_X<def> = COPY %vreg43<kill>; R600_Reg32:%vreg43
768B%vreg44<def> = COPY %vreg1:sel_y; R600_Reg32:%vreg44 R600_Reg128:%vreg1
784B%T1_Y<def> = COPY %vreg44<kill>; R600_Reg32:%vreg44
800B%vreg45<def> = COPY %vreg1:sel_z; R600_Reg32:%vreg45 R600_Reg128:%vreg1
816B%T1_Z<def> = COPY %vreg45<kill>; R600_Reg32:%vreg45
832B%vreg46<def> = COPY %vreg1:sel_w<kill>; R600_Reg32:%vreg46 R600_Reg128:%vreg1
848B%T1_W<def> = COPY %vreg46<kill>; R600_Reg32:%vreg46
864BRETURN %T1_W<imp-use,kill>, %T1_Z<imp-use,kill>, %T1_Y<imp-use,kill>, %T1_X<imp-use,kill>, %T2_W<imp-use,kill>, %T2_Z<imp-use,kill>, %T2_Y<imp-use,kill>, %T2_X<imp-use,kill>

880BBB#3: derived from LLVM BB %41
    Predecessors according to CFG: BB#1
896B%vreg31<def> = COPY %vreg6:sel_x; R600_Reg32:%vreg31 R600_Reg128:%vreg6
912B%vreg32:sel_x<def,read-undef> = COPY %vreg31<kill>; R600_Reg128:%vreg32 R600_Reg32:%vreg31
928B%vreg34<def> = COPY %vreg6:sel_y; R600_Reg32:%vreg34 R600_Reg128:%vreg6
944B%vreg35<def> = COPY %vreg32<kill>; R600_Reg128:%vreg35,%vreg32
960B%vreg35:sel_y<def> = COPY %vreg34<kill>; R600_Reg128:%vreg35 R600_Reg32:%vreg34
976B%vreg36<def> = COPY %vreg35<kill>; R600_Reg128:%vreg36,%vreg35
992B%vreg36:sel_z<def> = COPY %vreg5<kill>; R600_Reg128:%vreg36 R600_Reg32:%vreg5
1008B%vreg37<def> = COPY %vreg6:sel_w; R600_Reg32:%vreg37 R600_Reg128:%vreg6
1024B%vreg10<def> = COPY %vreg36<kill>; R600_Reg128:%vreg10,%vreg36
1040B%vreg10:sel_w<def> = COPY %vreg37<kill>; R600_Reg128:%vreg10 R600_Reg32:%vreg37
1056B%vreg9<def> = COPY %vreg6:sel_z<kill>; R600_Reg32:%vreg9 R600_Reg128:%vreg6
1072B%vreg38<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 1; R600_Reg32:%vreg38
1088B%vreg11<def> = ADD_INT 0, 0, 1, 0, 0, 0, %vreg7<kill>, 0, 0, 0, %vreg38<kill>, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg11,%vreg7,%vreg38
1104B%vreg47<def> = COPY %vreg9<kill>; R600_Reg32:%vreg47,%vreg9
1120B%vreg48<def> = COPY %vreg10<kill>; R600_Reg128:%vreg48,%vreg10
1136B%vreg49<def> = COPY %vreg11<kill>; R600_Reg32:%vreg49,%vreg11
1152BJUMP <BB#1>, pred:%noreg
    Successors according to CFG: BB#1

# End machine code for function main.

# *** IR Dump Before Debug Variable Analysis ***:
# Machine code for function main: Post SSA
Function Live Ins: %T1_X in %vreg14, %T1_Y in %vreg15, %T1_Z in %vreg16, %T1_W in %vreg17
Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X

0BBB#0: derived from LLVM BB %0
    Live Ins: %T1_X %T1_Y %T1_Z %T1_W
16B%vreg17<def> = COPY %T1_W; R600_TReg32:%vreg17
32B%vreg16<def> = COPY %T1_Z; R600_TReg32:%vreg16
48B%vreg15<def> = COPY %T1_Y; R600_TReg32:%vreg15
64B%vreg14<def> = COPY %T1_X; R600_TReg32:%vreg14
80B%vreg18<def> = COPY %C1_X; R600_Reg32:%vreg18
96B%vreg19:sel_x<def,read-undef> = COPY %vreg14<kill>; R600_Reg128:%vreg19 R600_TReg32:%vreg14
112B%vreg2<def> = COPY %C1_Y; R600_Reg32:%vreg2
128B%vreg21:sel_x<def,read-undef> = COPY %vreg18<kill>; R600_Reg128:%vreg21 R600_Reg32:%vreg18
144B%vreg23<def> = COPY %vreg19<kill>; R600_Reg128:%vreg23,%vreg19
160B%vreg23:sel_y<def> = COPY %vreg15<kill>; R600_Reg128:%vreg23 R600_TReg32:%vreg15
176B%vreg24<def> = COPY %vreg21<kill>; R600_Reg128:%vreg24,%vreg21
192B%vreg24:sel_y<def> = COPY %vreg2; R600_Reg128:%vreg24 R600_Reg32:%vreg2
208B%vreg25<def> = COPY %C1_Z; R600_Reg32:%vreg25
224B%vreg26<def> = COPY %vreg23<kill>; R600_Reg128:%vreg26,%vreg23
240B%vreg26:sel_z<def> = COPY %vreg16<kill>; R600_Reg128:%vreg26 R600_TReg32:%vreg16
256B%vreg27<def> = COPY %vreg24<kill>; R600_Reg128:%vreg27,%vreg24
272B%vreg27:sel_z<def> = COPY %vreg25<kill>; R600_Reg128:%vreg27 R600_Reg32:%vreg25
288B%vreg28<def> = COPY %C1_W; R600_Reg32:%vreg28
304B%vreg3<def> = COPY %vreg27<kill>; R600_Reg128:%vreg3,%vreg27
320B%vreg3:sel_w<def> = COPY %vreg28<kill>; R600_Reg128:%vreg3 R600_Reg32:%vreg28
336B%vreg1<def> = COPY %vreg26<kill>; R600_Reg128:%vreg1,%vreg26
352B%vreg1:sel_w<def> = COPY %vreg17<kill>; R600_Reg128:%vreg1 R600_TReg32:%vreg17
368B%vreg13<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg13
384B%vreg0<def> = COPY %C0_X; R600_Reg32:%vreg0
400B%vreg47<def> = COPY %vreg2<kill>; R600_Reg32:%vreg47,%vreg2
416B%vreg48<def> = COPY %vreg3<kill>; R600_Reg128:%vreg48,%vreg3
432B%vreg49<def> = COPY %vreg13<kill>; R600_Reg32:%vreg49,%vreg13
    Successors according to CFG: BB#1

448BBB#1: derived from LLVM BB %25
    Predecessors according to CFG: BB#0 BB#3
464B%vreg5<def> = COPY %vreg47<kill>; R600_Reg32:%vreg5,%vreg47
480B%vreg6<def> = COPY %vreg48<kill>; R600_Reg128:%vreg6,%vreg48
496B%vreg7<def> = COPY %vreg49<kill>; R600_Reg32:%vreg7,%vreg49
512B%vreg29<def> = SETGT_INT 0, 0, 1, 0, 0, 0, %vreg0, 0, 0, 0, %vreg7, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg29,%vreg0,%vreg7
528B%vreg30<def> = COPY %vreg29<kill>; R600_Reg32:%vreg30,%vreg29
544B%PREDICATE_BIT<def> = PRED_X %vreg30<kill>, 152, 16; R600_Reg32:%vreg30
560BJUMP <BB#3>, pred:%PREDICATE_BIT
576BJUMP <BB#2>, pred:%noreg
    Successors according to CFG: BB#2(4) BB#3(124)

592BBB#2: derived from LLVM BB %31
    Predecessors according to CFG: BB#1
608B%vreg39<def> = COPY %vreg6:sel_x; R600_Reg32:%vreg39 R600_Reg128:%vreg6
624B%T2_X<def> = COPY %vreg39<kill>; R600_Reg32:%vreg39
640B%vreg40<def> = COPY %vreg6:sel_y; R600_Reg32:%vreg40 R600_Reg128:%vreg6
656B%T2_Y<def> = COPY %vreg40<kill>; R600_Reg32:%vreg40
672B%vreg41<def> = COPY %vreg6:sel_z; R600_Reg32:%vreg41 R600_Reg128:%vreg6
688B%T2_Z<def> = COPY %vreg41<kill>; R600_Reg32:%vreg41
704B%vreg42<def> = COPY %vreg6:sel_w<kill>; R600_Reg32:%vreg42 R600_Reg128:%vreg6
720B%T2_W<def> = COPY %vreg42<kill>; R600_Reg32:%vreg42
736B%vreg43<def> = COPY %vreg1:sel_x; R600_Reg32:%vreg43 R600_Reg128:%vreg1
752B%T1_X<def> = COPY %vreg43<kill>; R600_Reg32:%vreg43
768B%vreg44<def> = COPY %vreg1:sel_y; R600_Reg32:%vreg44 R600_Reg128:%vreg1
784B%T1_Y<def> = COPY %vreg44<kill>; R600_Reg32:%vreg44
800B%vreg45<def> = COPY %vreg1:sel_z; R600_Reg32:%vreg45 R600_Reg128:%vreg1
816B%T1_Z<def> = COPY %vreg45<kill>; R600_Reg32:%vreg45
832B%vreg46<def> = COPY %vreg1:sel_w<kill>; R600_Reg32:%vreg46 R600_Reg128:%vreg1
848B%T1_W<def> = COPY %vreg46<kill>; R600_Reg32:%vreg46
864BRETURN %T1_W<imp-use>, %T1_Z<imp-use>, %T1_Y<imp-use>, %T1_X<imp-use>, %T2_W<imp-use,kill>, %T2_Z<imp-use,kill>, %T2_Y<imp-use,kill>, %T2_X<imp-use,kill>

880BBB#3: derived from LLVM BB %41
    Predecessors according to CFG: BB#1
896B%vreg31<def> = COPY %vreg6:sel_x; R600_Reg32:%vreg31 R600_Reg128:%vreg6
912B%vreg32:sel_x<def,read-undef> = COPY %vreg31<kill>; R600_Reg128:%vreg32 R600_Reg32:%vreg31
928B%vreg34<def> = COPY %vreg6:sel_y; R600_Reg32:%vreg34 R600_Reg128:%vreg6
944B%vreg35<def> = COPY %vreg32<kill>; R600_Reg128:%vreg35,%vreg32
960B%vreg35:sel_y<def> = COPY %vreg34<kill>; R600_Reg128:%vreg35 R600_Reg32:%vreg34
976B%vreg36<def> = COPY %vreg35<kill>; R600_Reg128:%vreg36,%vreg35
992B%vreg36:sel_z<def> = COPY %vreg5<kill>; R600_Reg128:%vreg36 R600_Reg32:%vreg5
1008B%vreg37<def> = COPY %vreg6:sel_w; R600_Reg32:%vreg37 R600_Reg128:%vreg6
1024B%vreg10<def> = COPY %vreg36<kill>; R600_Reg128:%vreg10,%vreg36
1040B%vreg10:sel_w<def> = COPY %vreg37<kill>; R600_Reg128:%vreg10 R600_Reg32:%vreg37
1056B%vreg9<def> = COPY %vreg6:sel_z<kill>; R600_Reg32:%vreg9 R600_Reg128:%vreg6
1072B%vreg38<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 1; R600_Reg32:%vreg38
1088B%vreg11<def> = ADD_INT 0, 0, 1, 0, 0, 0, %vreg7<kill>, 0, 0, 0, %vreg38<kill>, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg11,%vreg7,%vreg38
1104B%vreg47<def> = COPY %vreg9<kill>; R600_Reg32:%vreg47,%vreg9
1120B%vreg48<def> = COPY %vreg10<kill>; R600_Reg128:%vreg48,%vreg10
1136B%vreg49<def> = COPY %vreg11<kill>; R600_Reg32:%vreg49,%vreg11
1152BJUMP <BB#1>, pred:%noreg
    Successors according to CFG: BB#1

# End machine code for function main.

# *** IR Dump Before Simple Register Coalescing ***:
# Machine code for function main: Post SSA
Function Live Ins: %T1_X in %vreg14, %T1_Y in %vreg15, %T1_Z in %vreg16, %T1_W in %vreg17
Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X

0BBB#0: derived from LLVM BB %0
    Live Ins: %T1_X %T1_Y %T1_Z %T1_W
16B%vreg17<def> = COPY %T1_W; R600_TReg32:%vreg17
32B%vreg16<def> = COPY %T1_Z; R600_TReg32:%vreg16
48B%vreg15<def> = COPY %T1_Y; R600_TReg32:%vreg15
64B%vreg14<def> = COPY %T1_X; R600_TReg32:%vreg14
80B%vreg18<def> = COPY %C1_X; R600_Reg32:%vreg18
96B%vreg19:sel_x<def,read-undef> = COPY %vreg14<kill>; R600_Reg128:%vreg19 R600_TReg32:%vreg14
112B%vreg2<def> = COPY %C1_Y; R600_Reg32:%vreg2
128B%vreg21:sel_x<def,read-undef> = COPY %vreg18<kill>; R600_Reg128:%vreg21 R600_Reg32:%vreg18
144B%vreg23<def> = COPY %vreg19<kill>; R600_Reg128:%vreg23,%vreg19
160B%vreg23:sel_y<def> = COPY %vreg15<kill>; R600_Reg128:%vreg23 R600_TReg32:%vreg15
176B%vreg24<def> = COPY %vreg21<kill>; R600_Reg128:%vreg24,%vreg21
192B%vreg24:sel_y<def> = COPY %vreg2; R600_Reg128:%vreg24 R600_Reg32:%vreg2
208B%vreg25<def> = COPY %C1_Z; R600_Reg32:%vreg25
224B%vreg26<def> = COPY %vreg23<kill>; R600_Reg128:%vreg26,%vreg23
240B%vreg26:sel_z<def> = COPY %vreg16<kill>; R600_Reg128:%vreg26 R600_TReg32:%vreg16
256B%vreg27<def> = COPY %vreg24<kill>; R600_Reg128:%vreg27,%vreg24
272B%vreg27:sel_z<def> = COPY %vreg25<kill>; R600_Reg128:%vreg27 R600_Reg32:%vreg25
288B%vreg28<def> = COPY %C1_W; R600_Reg32:%vreg28
304B%vreg3<def> = COPY %vreg27<kill>; R600_Reg128:%vreg3,%vreg27
320B%vreg3:sel_w<def> = COPY %vreg28<kill>; R600_Reg128:%vreg3 R600_Reg32:%vreg28
336B%vreg1<def> = COPY %vreg26<kill>; R600_Reg128:%vreg1,%vreg26
352B%vreg1:sel_w<def> = COPY %vreg17<kill>; R600_Reg128:%vreg1 R600_TReg32:%vreg17
368B%vreg13<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg13
384B%vreg0<def> = COPY %C0_X; R600_Reg32:%vreg0
400B%vreg47<def> = COPY %vreg2<kill>; R600_Reg32:%vreg47,%vreg2
416B%vreg48<def> = COPY %vreg3<kill>; R600_Reg128:%vreg48,%vreg3
432B%vreg49<def> = COPY %vreg13<kill>; R600_Reg32:%vreg49,%vreg13
    Successors according to CFG: BB#1

448BBB#1: derived from LLVM BB %25
    Predecessors according to CFG: BB#0 BB#3
464B%vreg5<def> = COPY %vreg47<kill>; R600_Reg32:%vreg5,%vreg47
480B%vreg6<def> = COPY %vreg48<kill>; R600_Reg128:%vreg6,%vreg48
496B%vreg7<def> = COPY %vreg49<kill>; R600_Reg32:%vreg7,%vreg49
512B%vreg29<def> = SETGT_INT 0, 0, 1, 0, 0, 0, %vreg0, 0, 0, 0, %vreg7, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg29,%vreg0,%vreg7
528B%vreg30<def> = COPY %vreg29<kill>; R600_Reg32:%vreg30,%vreg29
544B%PREDICATE_BIT<def> = PRED_X %vreg30<kill>, 152, 16; R600_Reg32:%vreg30
560BJUMP <BB#3>, pred:%PREDICATE_BIT
576BJUMP <BB#2>, pred:%noreg
    Successors according to CFG: BB#2(4) BB#3(124)

592BBB#2: derived from LLVM BB %31
    Predecessors according to CFG: BB#1
608B%vreg39<def> = COPY %vreg6:sel_x; R600_Reg32:%vreg39 R600_Reg128:%vreg6
624B%T2_X<def> = COPY %vreg39<kill>; R600_Reg32:%vreg39
640B%vreg40<def> = COPY %vreg6:sel_y; R600_Reg32:%vreg40 R600_Reg128:%vreg6
656B%T2_Y<def> = COPY %vreg40<kill>; R600_Reg32:%vreg40
672B%vreg41<def> = COPY %vreg6:sel_z; R600_Reg32:%vreg41 R600_Reg128:%vreg6
688B%T2_Z<def> = COPY %vreg41<kill>; R600_Reg32:%vreg41
704B%vreg42<def> = COPY %vreg6:sel_w<kill>; R600_Reg32:%vreg42 R600_Reg128:%vreg6
720B%T2_W<def> = COPY %vreg42<kill>; R600_Reg32:%vreg42
736B%vreg43<def> = COPY %vreg1:sel_x; R600_Reg32:%vreg43 R600_Reg128:%vreg1
752B%T1_X<def> = COPY %vreg43<kill>; R600_Reg32:%vreg43
768B%vreg44<def> = COPY %vreg1:sel_y; R600_Reg32:%vreg44 R600_Reg128:%vreg1
784B%T1_Y<def> = COPY %vreg44<kill>; R600_Reg32:%vreg44
800B%vreg45<def> = COPY %vreg1:sel_z; R600_Reg32:%vreg45 R600_Reg128:%vreg1
816B%T1_Z<def> = COPY %vreg45<kill>; R600_Reg32:%vreg45
832B%vreg46<def> = COPY %vreg1:sel_w<kill>; R600_Reg32:%vreg46 R600_Reg128:%vreg1
848B%T1_W<def> = COPY %vreg46<kill>; R600_Reg32:%vreg46
864BRETURN %T1_W<imp-use>, %T1_Z<imp-use>, %T1_Y<imp-use>, %T1_X<imp-use>, %T2_W<imp-use,kill>, %T2_Z<imp-use,kill>, %T2_Y<imp-use,kill>, %T2_X<imp-use,kill>

880BBB#3: derived from LLVM BB %41
    Predecessors according to CFG: BB#1
896B%vreg31<def> = COPY %vreg6:sel_x; R600_Reg32:%vreg31 R600_Reg128:%vreg6
912B%vreg32:sel_x<def,read-undef> = COPY %vreg31<kill>; R600_Reg128:%vreg32 R600_Reg32:%vreg31
928B%vreg34<def> = COPY %vreg6:sel_y; R600_Reg32:%vreg34 R600_Reg128:%vreg6
944B%vreg35<def> = COPY %vreg32<kill>; R600_Reg128:%vreg35,%vreg32
960B%vreg35:sel_y<def> = COPY %vreg34<kill>; R600_Reg128:%vreg35 R600_Reg32:%vreg34
976B%vreg36<def> = COPY %vreg35<kill>; R600_Reg128:%vreg36,%vreg35
992B%vreg36:sel_z<def> = COPY %vreg5<kill>; R600_Reg128:%vreg36 R600_Reg32:%vreg5
1008B%vreg37<def> = COPY %vreg6:sel_w; R600_Reg32:%vreg37 R600_Reg128:%vreg6
1024B%vreg10<def> = COPY %vreg36<kill>; R600_Reg128:%vreg10,%vreg36
1040B%vreg10:sel_w<def> = COPY %vreg37<kill>; R600_Reg128:%vreg10 R600_Reg32:%vreg37
1056B%vreg9<def> = COPY %vreg6:sel_z<kill>; R600_Reg32:%vreg9 R600_Reg128:%vreg6
1072B%vreg38<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 1; R600_Reg32:%vreg38
1088B%vreg11<def> = ADD_INT 0, 0, 1, 0, 0, 0, %vreg7<kill>, 0, 0, 0, %vreg38<kill>, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg11,%vreg7,%vreg38
1104B%vreg47<def> = COPY %vreg9<kill>; R600_Reg32:%vreg47,%vreg9
1120B%vreg48<def> = COPY %vreg10<kill>; R600_Reg128:%vreg48,%vreg10
1136B%vreg49<def> = COPY %vreg11<kill>; R600_Reg32:%vreg49,%vreg11
1152BJUMP <BB#1>, pred:%noreg
    Successors according to CFG: BB#1

# End machine code for function main.

# *** IR Dump Before Live Stack Slot Analysis ***:
# Machine code for function main: Post SSA
Function Live Ins: %T1_X in %vreg14, %T1_Y in %vreg15, %T1_Z in %vreg16, %T1_W in %vreg17
Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X

0BBB#0: derived from LLVM BB %0
    Live Ins: %T1_X %T1_Y %T1_Z %T1_W
16B%vreg1:sel_w<def,read-undef> = COPY %T1_W; R600_Reg128:%vreg1
32B%vreg1:sel_z<def> = COPY %T1_Z; R600_Reg128:%vreg1
48B%vreg1:sel_y<def> = COPY %T1_Y; R600_Reg128:%vreg1
64B%vreg1:sel_x<def> = COPY %T1_X; R600_Reg128:%vreg1
128B%vreg27:sel_x<def,read-undef> = COPY %C1_X; R600_Reg128:%vreg27
192B%vreg27:sel_y<def> = COPY %C1_Y; R600_Reg128:%vreg27
272B%vreg27:sel_z<def> = COPY %C1_Z; R600_Reg128:%vreg27
320B%vreg27:sel_w<def> = COPY %C1_W; R600_Reg128:%vreg27
368B%vreg49<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg49
    Successors according to CFG: BB#1

448BBB#1: derived from LLVM BB %25
    Predecessors according to CFG: BB#0 BB#3
512B%vreg30<def> = SETGT_INT 0, 0, 1, 0, 0, 0, %C0_X, 0, 0, 0, %vreg49, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg30,%vreg49
544B%PREDICATE_BIT<def> = PRED_X %vreg30, 152, 16; R600_Reg32:%vreg30
560BJUMP <BB#3>, pred:%PREDICATE_BIT
576BJUMP <BB#2>, pred:%noreg
    Successors according to CFG: BB#2(4) BB#3(124)

592BBB#2: derived from LLVM BB %31
    Predecessors according to CFG: BB#1
624B%T2_X<def> = COPY %vreg27:sel_x; R600_Reg128:%vreg27
656B%T2_Y<def> = COPY %vreg27:sel_y; R600_Reg128:%vreg27
688B%T2_Z<def> = COPY %vreg27:sel_z; R600_Reg128:%vreg27
720B%T2_W<def> = COPY %vreg27:sel_w; R600_Reg128:%vreg27
752B%T1_X<def> = COPY %vreg1:sel_x; R600_Reg128:%vreg1
784B%T1_Y<def> = COPY %vreg1:sel_y; R600_Reg128:%vreg1
816B%T1_Z<def> = COPY %vreg1:sel_z; R600_Reg128:%vreg1
848B%T1_W<def> = COPY %vreg1:sel_w; R600_Reg128:%vreg1
864BRETURN %T1_W<imp-use>, %T1_Z<imp-use>, %T1_Y<imp-use>, %T1_X<imp-use>, %T2_W<imp-use,kill>, %T2_Z<imp-use,kill>, %T2_Y<imp-use,kill>, %T2_X<imp-use,kill>

880BBB#3: derived from LLVM BB %41
    Predecessors according to CFG: BB#1
1072B%vreg38<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 1; R600_Reg32:%vreg38
1088B%vreg49<def> = ADD_INT 0, 0, 1, 0, 0, 0, %vreg49, 0, 0, 0, %vreg38<kill>, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg49,%vreg38
1152BJUMP <BB#1>, pred:%noreg
    Successors according to CFG: BB#1

# End machine code for function main.

# *** IR Dump Before Calculate spill weights ***:
# Machine code for function main: Post SSA
Function Live Ins: %T1_X in %vreg14, %T1_Y in %vreg15, %T1_Z in %vreg16, %T1_W in %vreg17
Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X

0BBB#0: derived from LLVM BB %0
    Live Ins: %T1_X %T1_Y %T1_Z %T1_W
16B%vreg1:sel_w<def,read-undef> = COPY %T1_W; R600_Reg128:%vreg1
32B%vreg1:sel_z<def> = COPY %T1_Z; R600_Reg128:%vreg1
48B%vreg1:sel_y<def> = COPY %T1_Y; R600_Reg128:%vreg1
64B%vreg1:sel_x<def> = COPY %T1_X; R600_Reg128:%vreg1
128B%vreg27:sel_x<def,read-undef> = COPY %C1_X; R600_Reg128:%vreg27
192B%vreg27:sel_y<def> = COPY %C1_Y; R600_Reg128:%vreg27
272B%vreg27:sel_z<def> = COPY %C1_Z; R600_Reg128:%vreg27
320B%vreg27:sel_w<def> = COPY %C1_W; R600_Reg128:%vreg27
368B%vreg49<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg49
    Successors according to CFG: BB#1

448BBB#1: derived from LLVM BB %25
    Predecessors according to CFG: BB#0 BB#3
512B%vreg30<def> = SETGT_INT 0, 0, 1, 0, 0, 0, %C0_X, 0, 0, 0, %vreg49, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg30,%vreg49
544B%PREDICATE_BIT<def> = PRED_X %vreg30, 152, 16; R600_Reg32:%vreg30
560BJUMP <BB#3>, pred:%PREDICATE_BIT
576BJUMP <BB#2>, pred:%noreg
    Successors according to CFG: BB#2(4) BB#3(124)

592BBB#2: derived from LLVM BB %31
    Predecessors according to CFG: BB#1
624B%T2_X<def> = COPY %vreg27:sel_x; R600_Reg128:%vreg27
656B%T2_Y<def> = COPY %vreg27:sel_y; R600_Reg128:%vreg27
688B%T2_Z<def> = COPY %vreg27:sel_z; R600_Reg128:%vreg27
720B%T2_W<def> = COPY %vreg27:sel_w; R600_Reg128:%vreg27
752B%T1_X<def> = COPY %vreg1:sel_x; R600_Reg128:%vreg1
784B%T1_Y<def> = COPY %vreg1:sel_y; R600_Reg128:%vreg1
816B%T1_Z<def> = COPY %vreg1:sel_z; R600_Reg128:%vreg1
848B%T1_W<def> = COPY %vreg1:sel_w; R600_Reg128:%vreg1
864BRETURN %T1_W<imp-use>, %T1_Z<imp-use>, %T1_Y<imp-use>, %T1_X<imp-use>, %T2_W<imp-use,kill>, %T2_Z<imp-use,kill>, %T2_Y<imp-use,kill>, %T2_X<imp-use,kill>

880BBB#3: derived from LLVM BB %41
    Predecessors according to CFG: BB#1
1072B%vreg38<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 1; R600_Reg32:%vreg38
1088B%vreg49<def> = ADD_INT 0, 0, 1, 0, 0, 0, %vreg49, 0, 0, 0, %vreg38<kill>, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg49,%vreg38
1152BJUMP <BB#1>, pred:%noreg
    Successors according to CFG: BB#1

# End machine code for function main.

# *** IR Dump Before Virtual Register Map ***:
# Machine code for function main: Post SSA
Function Live Ins: %T1_X in %vreg14, %T1_Y in %vreg15, %T1_Z in %vreg16, %T1_W in %vreg17
Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X

0BBB#0: derived from LLVM BB %0
    Live Ins: %T1_X %T1_Y %T1_Z %T1_W
16B%vreg1:sel_w<def,read-undef> = COPY %T1_W; R600_Reg128:%vreg1
32B%vreg1:sel_z<def> = COPY %T1_Z; R600_Reg128:%vreg1
48B%vreg1:sel_y<def> = COPY %T1_Y; R600_Reg128:%vreg1
64B%vreg1:sel_x<def> = COPY %T1_X; R600_Reg128:%vreg1
128B%vreg27:sel_x<def,read-undef> = COPY %C1_X; R600_Reg128:%vreg27
192B%vreg27:sel_y<def> = COPY %C1_Y; R600_Reg128:%vreg27
272B%vreg27:sel_z<def> = COPY %C1_Z; R600_Reg128:%vreg27
320B%vreg27:sel_w<def> = COPY %C1_W; R600_Reg128:%vreg27
368B%vreg49<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg49
    Successors according to CFG: BB#1

448BBB#1: derived from LLVM BB %25
    Predecessors according to CFG: BB#0 BB#3
512B%vreg30<def> = SETGT_INT 0, 0, 1, 0, 0, 0, %C0_X, 0, 0, 0, %vreg49, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg30,%vreg49
544B%PREDICATE_BIT<def> = PRED_X %vreg30, 152, 16; R600_Reg32:%vreg30
560BJUMP <BB#3>, pred:%PREDICATE_BIT
576BJUMP <BB#2>, pred:%noreg
    Successors according to CFG: BB#2(4) BB#3(124)

592BBB#2: derived from LLVM BB %31
    Predecessors according to CFG: BB#1
624B%T2_X<def> = COPY %vreg27:sel_x; R600_Reg128:%vreg27
656B%T2_Y<def> = COPY %vreg27:sel_y; R600_Reg128:%vreg27
688B%T2_Z<def> = COPY %vreg27:sel_z; R600_Reg128:%vreg27
720B%T2_W<def> = COPY %vreg27:sel_w; R600_Reg128:%vreg27
752B%T1_X<def> = COPY %vreg1:sel_x; R600_Reg128:%vreg1
784B%T1_Y<def> = COPY %vreg1:sel_y; R600_Reg128:%vreg1
816B%T1_Z<def> = COPY %vreg1:sel_z; R600_Reg128:%vreg1
848B%T1_W<def> = COPY %vreg1:sel_w; R600_Reg128:%vreg1
864BRETURN %T1_W<imp-use>, %T1_Z<imp-use>, %T1_Y<imp-use>, %T1_X<imp-use>, %T2_W<imp-use,kill>, %T2_Z<imp-use,kill>, %T2_Y<imp-use,kill>, %T2_X<imp-use,kill>

880BBB#3: derived from LLVM BB %41
    Predecessors according to CFG: BB#1
1072B%vreg38<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 1; R600_Reg32:%vreg38
1088B%vreg49<def> = ADD_INT 0, 0, 1, 0, 0, 0, %vreg49, 0, 0, 0, %vreg38<kill>, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg49,%vreg38
1152BJUMP <BB#1>, pred:%noreg
    Successors according to CFG: BB#1

# End machine code for function main.

# *** IR Dump Before Live Register Matrix ***:
# Machine code for function main: Post SSA
Function Live Ins: %T1_X in %vreg14, %T1_Y in %vreg15, %T1_Z in %vreg16, %T1_W in %vreg17
Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X

0BBB#0: derived from LLVM BB %0
    Live Ins: %T1_X %T1_Y %T1_Z %T1_W
16B%vreg1:sel_w<def,read-undef> = COPY %T1_W; R600_Reg128:%vreg1
32B%vreg1:sel_z<def> = COPY %T1_Z; R600_Reg128:%vreg1
48B%vreg1:sel_y<def> = COPY %T1_Y; R600_Reg128:%vreg1
64B%vreg1:sel_x<def> = COPY %T1_X; R600_Reg128:%vreg1
128B%vreg27:sel_x<def,read-undef> = COPY %C1_X; R600_Reg128:%vreg27
192B%vreg27:sel_y<def> = COPY %C1_Y; R600_Reg128:%vreg27
272B%vreg27:sel_z<def> = COPY %C1_Z; R600_Reg128:%vreg27
320B%vreg27:sel_w<def> = COPY %C1_W; R600_Reg128:%vreg27
368B%vreg49<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg49
    Successors according to CFG: BB#1

448BBB#1: derived from LLVM BB %25
    Predecessors according to CFG: BB#0 BB#3
512B%vreg30<def> = SETGT_INT 0, 0, 1, 0, 0, 0, %C0_X, 0, 0, 0, %vreg49, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg30,%vreg49
544B%PREDICATE_BIT<def> = PRED_X %vreg30, 152, 16; R600_Reg32:%vreg30
560BJUMP <BB#3>, pred:%PREDICATE_BIT
576BJUMP <BB#2>, pred:%noreg
    Successors according to CFG: BB#2(4) BB#3(124)

592BBB#2: derived from LLVM BB %31
    Predecessors according to CFG: BB#1
624B%T2_X<def> = COPY %vreg27:sel_x; R600_Reg128:%vreg27
656B%T2_Y<def> = COPY %vreg27:sel_y; R600_Reg128:%vreg27
688B%T2_Z<def> = COPY %vreg27:sel_z; R600_Reg128:%vreg27
720B%T2_W<def> = COPY %vreg27:sel_w; R600_Reg128:%vreg27
752B%T1_X<def> = COPY %vreg1:sel_x; R600_Reg128:%vreg1
784B%T1_Y<def> = COPY %vreg1:sel_y; R600_Reg128:%vreg1
816B%T1_Z<def> = COPY %vreg1:sel_z; R600_Reg128:%vreg1
848B%T1_W<def> = COPY %vreg1:sel_w; R600_Reg128:%vreg1
864BRETURN %T1_W<imp-use>, %T1_Z<imp-use>, %T1_Y<imp-use>, %T1_X<imp-use>, %T2_W<imp-use,kill>, %T2_Z<imp-use,kill>, %T2_Y<imp-use,kill>, %T2_X<imp-use,kill>

880BBB#3: derived from LLVM BB %41
    Predecessors according to CFG: BB#1
1072B%vreg38<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 1; R600_Reg32:%vreg38
1088B%vreg49<def> = ADD_INT 0, 0, 1, 0, 0, 0, %vreg49, 0, 0, 0, %vreg38<kill>, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg49,%vreg38
1152BJUMP <BB#1>, pred:%noreg
    Successors according to CFG: BB#1

# End machine code for function main.

# *** IR Dump Before Virtual Register Rewriter ***:
# Machine code for function main: Post SSA
Function Live Ins: %T1_X in %vreg14, %T1_Y in %vreg15, %T1_Z in %vreg16, %T1_W in %vreg17
Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X

0BBB#0: derived from LLVM BB %0
    Live Ins: %T1_X %T1_Y %T1_Z %T1_W
16B%vreg1:sel_w<def,read-undef> = COPY %T1_W; R600_Reg128:%vreg1
32B%vreg1:sel_z<def> = COPY %T1_Z; R600_Reg128:%vreg1
48B%vreg1:sel_y<def> = COPY %T1_Y; R600_Reg128:%vreg1
64B%vreg1:sel_x<def> = COPY %T1_X; R600_Reg128:%vreg1
128B%vreg27:sel_x<def,read-undef> = COPY %C1_X; R600_Reg128:%vreg27
192B%vreg27:sel_y<def> = COPY %C1_Y; R600_Reg128:%vreg27
272B%vreg27:sel_z<def> = COPY %C1_Z; R600_Reg128:%vreg27
320B%vreg27:sel_w<def> = COPY %C1_W; R600_Reg128:%vreg27
368B%vreg49<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg49
    Successors according to CFG: BB#1

448BBB#1: derived from LLVM BB %25
    Predecessors according to CFG: BB#0 BB#3
512B%vreg30<def> = SETGT_INT 0, 0, 1, 0, 0, 0, %C0_X, 0, 0, 0, %vreg49, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg30,%vreg49
544B%PREDICATE_BIT<def> = PRED_X %vreg30, 152, 16; R600_Reg32:%vreg30
560BJUMP <BB#3>, pred:%PREDICATE_BIT
576BJUMP <BB#2>, pred:%noreg
    Successors according to CFG: BB#2(4) BB#3(124)

592BBB#2: derived from LLVM BB %31
    Predecessors according to CFG: BB#1
624B%T2_X<def> = COPY %vreg27:sel_x; R600_Reg128:%vreg27
656B%T2_Y<def> = COPY %vreg27:sel_y; R600_Reg128:%vreg27
688B%T2_Z<def> = COPY %vreg27:sel_z; R600_Reg128:%vreg27
720B%T2_W<def> = COPY %vreg27:sel_w; R600_Reg128:%vreg27
752B%T1_X<def> = COPY %vreg1:sel_x; R600_Reg128:%vreg1
784B%T1_Y<def> = COPY %vreg1:sel_y; R600_Reg128:%vreg1
816B%T1_Z<def> = COPY %vreg1:sel_z; R600_Reg128:%vreg1
848B%T1_W<def> = COPY %vreg1:sel_w; R600_Reg128:%vreg1
864BRETURN %T1_W<imp-use>, %T1_Z<imp-use>, %T1_Y<imp-use>, %T1_X<imp-use>, %T2_W<imp-use>, %T2_Z<imp-use>, %T2_Y<imp-use>, %T2_X<imp-use>

880BBB#3: derived from LLVM BB %41
    Predecessors according to CFG: BB#1
1072B%vreg38<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 1; R600_Reg32:%vreg38
1088B%vreg49<def> = ADD_INT 0, 0, 1, 0, 0, 0, %vreg49, 0, 0, 0, %vreg38<kill>, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg49,%vreg38
1152BJUMP <BB#1>, pred:%noreg
    Successors according to CFG: BB#1

# End machine code for function main.

# *** IR Dump Before Stack Slot Coloring ***:
# Machine code for function main: Post SSA
Function Live Ins: %T1_X in %vreg14, %T1_Y in %vreg15, %T1_Z in %vreg16, %T1_W in %vreg17
Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X

0BBB#0: derived from LLVM BB %0
    Live Ins: %T1_X %T1_Y %T1_Z %T1_W
16B%T1_W<def> = KILL %T1_W, %T1_XYZW<imp-def>
32B%T1_Z<def> = KILL %T1_Z, %T1_XYZW<imp-use,kill>, %T1_XYZW<imp-def>
48B%T1_Y<def> = KILL %T1_Y, %T1_XYZW<imp-use,kill>, %T1_XYZW<imp-def>
64B%T1_X<def> = KILL %T1_X, %T1_XYZW<imp-use,kill>, %T1_XYZW<imp-def>
128B%T2_X<def> = COPY %C1_X, %T2_XYZW<imp-def>
192B%T2_Y<def> = COPY %C1_Y, %T2_XYZW<imp-use,kill>, %T2_XYZW<imp-def>
272B%T2_Z<def> = COPY %C1_Z, %T2_XYZW<imp-use,kill>, %T2_XYZW<imp-def>
320B%T2_W<def> = COPY %C1_W, %T2_XYZW<imp-use,kill>, %T2_XYZW<imp-def>
368B%T3_X<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0
    Successors according to CFG: BB#1

448BBB#1: derived from LLVM BB %25
    Live Ins: %T1_XYZW %T2_XYZW %T3_X
    Predecessors according to CFG: BB#0 BB#3
512B%T3_Y<def> = SETGT_INT 0, 0, 1, 0, 0, 0, %C0_X, 0, 0, 0, %T3_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0
544B%PREDICATE_BIT<def> = PRED_X %T3_Y<kill>, 152, 16
560BJUMP <BB#3>, pred:%PREDICATE_BIT
576BJUMP <BB#2>, pred:%noreg
    Successors according to CFG: BB#2(4) BB#3(124)

592BBB#2: derived from LLVM BB %31
    Live Ins: %T1_XYZW %T2_XYZW
    Predecessors according to CFG: BB#1
864BRETURN %T1_W<imp-use>, %T1_Z<imp-use>, %T1_Y<imp-use>, %T1_X<imp-use>, %T2_W<imp-use>, %T2_Z<imp-use>, %T2_Y<imp-use>, %T2_X<imp-use>

880BBB#3: derived from LLVM BB %41
    Live Ins: %T1_XYZW %T2_XYZW %T3_X
    Predecessors according to CFG: BB#1
1072B%T3_Y<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 1
1088B%T3_X<def> = ADD_INT 0, 0, 1, 0, 0, 0, %T3_X<kill>, 0, 0, 0, %T3_Y<kill>, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0
1152BJUMP <BB#1>, pred:%noreg
    Successors according to CFG: BB#1

# End machine code for function main.

# *** IR Dump Before Machine Loop Invariant Code Motion ***:
# Machine code for function main: Post SSA
Function Live Ins: %T1_X in %vreg14, %T1_Y in %vreg15, %T1_Z in %vreg16, %T1_W in %vreg17
Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X

BB#0: derived from LLVM BB %0
    Live Ins: %T1_X %T1_Y %T1_Z %T1_W
%T1_W<def> = KILL %T1_W, %T1_XYZW<imp-def>
%T1_Z<def> = KILL %T1_Z, %T1_XYZW<imp-use,kill>, %T1_XYZW<imp-def>
%T1_Y<def> = KILL %T1_Y, %T1_XYZW<imp-use,kill>, %T1_XYZW<imp-def>
%T1_X<def> = KILL %T1_X, %T1_XYZW<imp-use,kill>, %T1_XYZW<imp-def>
%T2_X<def> = COPY %C1_X, %T2_XYZW<imp-def>
%T2_Y<def> = COPY %C1_Y, %T2_XYZW<imp-use,kill>, %T2_XYZW<imp-def>
%T2_Z<def> = COPY %C1_Z, %T2_XYZW<imp-use,kill>, %T2_XYZW<imp-def>
%T2_W<def> = COPY %C1_W, %T2_XYZW<imp-use,kill>, %T2_XYZW<imp-def>
%T3_X<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0
    Successors according to CFG: BB#1

BB#1: derived from LLVM BB %25
    Live Ins: %T1_XYZW %T2_XYZW %T3_X
    Predecessors according to CFG: BB#0 BB#3
%T3_Y<def> = SETGT_INT 0, 0, 1, 0, 0, 0, %C0_X, 0, 0, 0, %T3_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0
%PREDICATE_BIT<def> = PRED_X %T3_Y<kill>, 152, 16
JUMP <BB#3>, pred:%PREDICATE_BIT
JUMP <BB#2>, pred:%noreg
    Successors according to CFG: BB#2(4) BB#3(124)

BB#2: derived from LLVM BB %31
    Live Ins: %T1_XYZW %T2_XYZW
    Predecessors according to CFG: BB#1
RETURN %T1_W<imp-use>, %T1_Z<imp-use>, %T1_Y<imp-use>, %T1_X<imp-use>, %T2_W<imp-use>, %T2_Z<imp-use>, %T2_Y<imp-use>, %T2_X<imp-use>

BB#3: derived from LLVM BB %41
    Live Ins: %T1_XYZW %T2_XYZW %T3_X
    Predecessors according to CFG: BB#1
%T3_Y<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 1
%T3_X<def> = ADD_INT 0, 0, 1, 0, 0, 0, %T3_X<kill>, 0, 0, 0, %T3_Y<kill>, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0
JUMP <BB#1>, pred:%noreg
    Successors according to CFG: BB#1

# End machine code for function main.

# *** IR Dump Before Prologue/Epilogue Insertion & Frame Finalization ***:
# Machine code for function main: Post SSA
Function Live Ins: %T1_X in %vreg14, %T1_Y in %vreg15, %T1_Z in %vreg16, %T1_W in %vreg17
Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X

BB#0: derived from LLVM BB %0
    Live Ins: %T1_X %T1_Y %T1_Z %T1_W
%T1_W<def> = KILL %T1_W, %T1_XYZW<imp-def>
%T1_Z<def> = KILL %T1_Z, %T1_XYZW<imp-use,kill>, %T1_XYZW<imp-def>
%T1_Y<def> = KILL %T1_Y, %T1_XYZW<imp-use,kill>, %T1_XYZW<imp-def>
%T1_X<def> = KILL %T1_X, %T1_XYZW<imp-use,kill>, %T1_XYZW<imp-def>
%T2_X<def> = COPY %C1_X, %T2_XYZW<imp-def>
%T2_Y<def> = COPY %C1_Y, %T2_XYZW<imp-use,kill>, %T2_XYZW<imp-def>
%T2_Z<def> = COPY %C1_Z, %T2_XYZW<imp-use,kill>, %T2_XYZW<imp-def>
%T2_W<def> = COPY %C1_W, %T2_XYZW<imp-use,kill>, %T2_XYZW<imp-def>
%T3_X<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0
    Successors according to CFG: BB#1

BB#1: derived from LLVM BB %25
    Live Ins: %T1_XYZW %T2_XYZW %T3_X
    Predecessors according to CFG: BB#0 BB#3
%T3_Y<def> = SETGT_INT 0, 0, 1, 0, 0, 0, %C0_X, 0, 0, 0, %T3_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0
%PREDICATE_BIT<def> = PRED_X %T3_Y<kill>, 152, 16
JUMP <BB#3>, pred:%PREDICATE_BIT
JUMP <BB#2>, pred:%noreg
    Successors according to CFG: BB#2(4) BB#3(124)

BB#2: derived from LLVM BB %31
    Live Ins: %T1_XYZW %T2_XYZW
    Predecessors according to CFG: BB#1
RETURN %T1_W<imp-use>, %T1_Z<imp-use>, %T1_Y<imp-use>, %T1_X<imp-use>, %T2_W<imp-use>, %T2_Z<imp-use>, %T2_Y<imp-use>, %T2_X<imp-use>

BB#3: derived from LLVM BB %41
    Live Ins: %T1_XYZW %T2_XYZW %T3_X
    Predecessors according to CFG: BB#1
%T3_Y<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 1
%T3_X<def> = ADD_INT 0, 0, 1, 0, 0, 0, %T3_X<kill>, 0, 0, 0, %T3_Y<kill>, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0
JUMP <BB#1>, pred:%noreg
    Successors according to CFG: BB#1

# End machine code for function main.

# *** IR Dump Before Control Flow Optimizer ***:
# Machine code for function main: Post SSA
Function Live Ins: %T1_X in %vreg14, %T1_Y in %vreg15, %T1_Z in %vreg16, %T1_W in %vreg17
Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X

BB#0: derived from LLVM BB %0
    Live Ins: %T1_X %T1_Y %T1_Z %T1_W
%T1_W<def> = KILL %T1_W, %T1_XYZW<imp-def>
%T1_Z<def> = KILL %T1_Z, %T1_XYZW<imp-use,kill>, %T1_XYZW<imp-def>
%T1_Y<def> = KILL %T1_Y, %T1_XYZW<imp-use,kill>, %T1_XYZW<imp-def>
%T1_X<def> = KILL %T1_X, %T1_XYZW<imp-use,kill>, %T1_XYZW<imp-def>
%T2_X<def> = COPY %C1_X, %T2_XYZW<imp-def>
%T2_Y<def> = COPY %C1_Y, %T2_XYZW<imp-use,kill>, %T2_XYZW<imp-def>
%T2_Z<def> = COPY %C1_Z, %T2_XYZW<imp-use,kill>, %T2_XYZW<imp-def>
%T2_W<def> = COPY %C1_W, %T2_XYZW<imp-use,kill>, %T2_XYZW<imp-def>
%T3_X<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0
    Successors according to CFG: BB#1

BB#1: derived from LLVM BB %25
    Live Ins: %T1_XYZW %T2_XYZW %T3_X
    Predecessors according to CFG: BB#0 BB#3
%T3_Y<def> = SETGT_INT 0, 0, 1, 0, 0, 0, %C0_X, 0, 0, 0, %T3_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0
%PREDICATE_BIT<def> = PRED_X %T3_Y<kill>, 152, 16
JUMP <BB#3>, pred:%PREDICATE_BIT
JUMP <BB#2>, pred:%noreg
    Successors according to CFG: BB#2(4) BB#3(124)

BB#2: derived from LLVM BB %31
    Live Ins: %T1_XYZW %T2_XYZW
    Predecessors according to CFG: BB#1
RETURN %T1_W<imp-use>, %T1_Z<imp-use>, %T1_Y<imp-use>, %T1_X<imp-use>, %T2_W<imp-use>, %T2_Z<imp-use>, %T2_Y<imp-use>, %T2_X<imp-use>

BB#3: derived from LLVM BB %41
    Live Ins: %T1_XYZW %T2_XYZW %T3_X
    Predecessors according to CFG: BB#1
%T3_Y<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 1
%T3_X<def> = ADD_INT 0, 0, 1, 0, 0, 0, %T3_X<kill>, 0, 0, 0, %T3_Y<kill>, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0
JUMP <BB#1>, pred:%noreg
    Successors according to CFG: BB#1

# End machine code for function main.

# *** IR Dump Before Tail Duplication ***:
# Machine code for function main: Post SSA, not tracking liveness
Function Live Ins: %T1_X in %vreg14, %T1_Y in %vreg15, %T1_Z in %vreg16, %T1_W in %vreg17
Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X

BB#0: derived from LLVM BB %0
    Live Ins: %T1_X %T1_Y %T1_Z %T1_W
%T1_W<def> = KILL %T1_W, %T1_XYZW<imp-def>
%T1_Z<def> = KILL %T1_Z, %T1_XYZW<imp-use,kill>, %T1_XYZW<imp-def>
%T1_Y<def> = KILL %T1_Y, %T1_XYZW<imp-use,kill>, %T1_XYZW<imp-def>
%T1_X<def> = KILL %T1_X, %T1_XYZW<imp-use,kill>, %T1_XYZW<imp-def>
%T2_X<def> = COPY %C1_X, %T2_XYZW<imp-def>
%T2_Y<def> = COPY %C1_Y, %T2_XYZW<imp-use,kill>, %T2_XYZW<imp-def>
%T2_Z<def> = COPY %C1_Z, %T2_XYZW<imp-use,kill>, %T2_XYZW<imp-def>
%T2_W<def> = COPY %C1_W, %T2_XYZW<imp-use,kill>, %T2_XYZW<imp-def>
%T3_X<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0
    Successors according to CFG: BB#1

BB#1: derived from LLVM BB %25
    Live Ins: %T1_XYZW %T2_XYZW %T3_X
    Predecessors according to CFG: BB#0 BB#3
%T3_Y<def> = SETGT_INT 0, 0, 1, 0, 0, 0, %C0_X, 0, 0, 0, %T3_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0
%PREDICATE_BIT<def> = PRED_X %T3_Y<kill>, 152, 16
JUMP <BB#3>, pred:%PREDICATE_BIT<kill>
    Successors according to CFG: BB#2(4) BB#3(124)

BB#2: derived from LLVM BB %31
    Live Ins: %T1_XYZW %T2_XYZW
    Predecessors according to CFG: BB#1
RETURN %T1_W<imp-use>, %T1_Z<imp-use>, %T1_Y<imp-use>, %T1_X<imp-use>, %T2_W<imp-use>, %T2_Z<imp-use>, %T2_Y<imp-use>, %T2_X<imp-use>

BB#3: derived from LLVM BB %41
    Live Ins: %T1_XYZW %T2_XYZW %T3_X
    Predecessors according to CFG: BB#1
%T3_Y<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 1
%T3_X<def> = ADD_INT 0, 0, 1, 0, 0, 0, %T3_X<kill>, 0, 0, 0, %T3_Y<kill>, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0
JUMP <BB#1>, pred:%noreg
    Successors according to CFG: BB#1

# End machine code for function main.

# *** IR Dump Before Machine Copy Propagation Pass ***:
# Machine code for function main: Post SSA, not tracking liveness
Function Live Ins: %T1_X in %vreg14, %T1_Y in %vreg15, %T1_Z in %vreg16, %T1_W in %vreg17
Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X

BB#0: derived from LLVM BB %0
    Live Ins: %T1_X %T1_Y %T1_Z %T1_W
%T1_W<def> = KILL %T1_W, %T1_XYZW<imp-def>
%T1_Z<def> = KILL %T1_Z, %T1_XYZW<imp-use,kill>, %T1_XYZW<imp-def>
%T1_Y<def> = KILL %T1_Y, %T1_XYZW<imp-use,kill>, %T1_XYZW<imp-def>
%T1_X<def> = KILL %T1_X, %T1_XYZW<imp-use,kill>, %T1_XYZW<imp-def>
%T2_X<def> = COPY %C1_X, %T2_XYZW<imp-def>
%T2_Y<def> = COPY %C1_Y, %T2_XYZW<imp-use,kill>, %T2_XYZW<imp-def>
%T2_Z<def> = COPY %C1_Z, %T2_XYZW<imp-use,kill>, %T2_XYZW<imp-def>
%T2_W<def> = COPY %C1_W, %T2_XYZW<imp-use,kill>, %T2_XYZW<imp-def>
%T3_X<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0
    Successors according to CFG: BB#1

BB#1: derived from LLVM BB %25
    Live Ins: %T1_XYZW %T2_XYZW %T3_X
    Predecessors according to CFG: BB#0 BB#3
%T3_Y<def> = SETGT_INT 0, 0, 1, 0, 0, 0, %C0_X, 0, 0, 0, %T3_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0
%PREDICATE_BIT<def> = PRED_X %T3_Y<kill>, 152, 16
JUMP <BB#3>, pred:%PREDICATE_BIT<kill>
    Successors according to CFG: BB#2(4) BB#3(124)

BB#2: derived from LLVM BB %31
    Live Ins: %T1_XYZW %T2_XYZW
    Predecessors according to CFG: BB#1
RETURN %T1_W<imp-use>, %T1_Z<imp-use>, %T1_Y<imp-use>, %T1_X<imp-use>, %T2_W<imp-use>, %T2_Z<imp-use>, %T2_Y<imp-use>, %T2_X<imp-use>

BB#3: derived from LLVM BB %41
    Live Ins: %T1_XYZW %T2_XYZW %T3_X
    Predecessors according to CFG: BB#1
%T3_Y<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 1
%T3_X<def> = ADD_INT 0, 0, 1, 0, 0, 0, %T3_X<kill>, 0, 0, 0, %T3_Y<kill>, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0
JUMP <BB#1>, pred:%noreg
    Successors according to CFG: BB#1

# End machine code for function main.

# *** IR Dump Before Post-RA pseudo instruction expansion pass ***:
# Machine code for function main: Post SSA, not tracking liveness
Function Live Ins: %T1_X in %vreg14, %T1_Y in %vreg15, %T1_Z in %vreg16, %T1_W in %vreg17
Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X

BB#0: derived from LLVM BB %0
    Live Ins: %T1_X %T1_Y %T1_Z %T1_W
%T1_W<def> = KILL %T1_W, %T1_XYZW<imp-def>
%T1_Z<def> = KILL %T1_Z, %T1_XYZW<imp-use,kill>, %T1_XYZW<imp-def>
%T1_Y<def> = KILL %T1_Y, %T1_XYZW<imp-use,kill>, %T1_XYZW<imp-def>
%T1_X<def> = KILL %T1_X, %T1_XYZW<imp-use,kill>, %T1_XYZW<imp-def>
%T2_X<def> = COPY %C1_X, %T2_XYZW<imp-def>
%T2_Y<def> = COPY %C1_Y, %T2_XYZW<imp-use,kill>, %T2_XYZW<imp-def>
%T2_Z<def> = COPY %C1_Z, %T2_XYZW<imp-use,kill>, %T2_XYZW<imp-def>
%T2_W<def> = COPY %C1_W, %T2_XYZW<imp-use,kill>, %T2_XYZW<imp-def>
%T3_X<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0
    Successors according to CFG: BB#1

BB#1: derived from LLVM BB %25
    Live Ins: %T1_XYZW %T2_XYZW %T3_X
    Predecessors according to CFG: BB#0 BB#3
%T3_Y<def> = SETGT_INT 0, 0, 1, 0, 0, 0, %C0_X, 0, 0, 0, %T3_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0
%PREDICATE_BIT<def> = PRED_X %T3_Y<kill>, 152, 16
JUMP <BB#3>, pred:%PREDICATE_BIT<kill>
    Successors according to CFG: BB#2(4) BB#3(124)

BB#2: derived from LLVM BB %31
    Live Ins: %T1_XYZW %T2_XYZW
    Predecessors according to CFG: BB#1
RETURN %T1_W<imp-use>, %T1_Z<imp-use>, %T1_Y<imp-use>, %T1_X<imp-use>, %T2_W<imp-use>, %T2_Z<imp-use>, %T2_Y<imp-use>, %T2_X<imp-use>

BB#3: derived from LLVM BB %41
    Live Ins: %T1_XYZW %T2_XYZW %T3_X
    Predecessors according to CFG: BB#1
%T3_Y<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 1
%T3_X<def> = ADD_INT 0, 0, 1, 0, 0, 0, %T3_X<kill>, 0, 0, 0, %T3_Y<kill>, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0
JUMP <BB#1>, pred:%noreg
    Successors according to CFG: BB#1

# End machine code for function main.

# *** IR Dump Before If Converter ***:
# Machine code for function main: Post SSA, not tracking liveness
Function Live Ins: %T1_X in %vreg14, %T1_Y in %vreg15, %T1_Z in %vreg16, %T1_W in %vreg17
Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X

BB#0: derived from LLVM BB %0
    Live Ins: %T1_X %T1_Y %T1_Z %T1_W
%T1_W<def> = KILL %T1_W, %T1_XYZW<imp-def>
%T1_Z<def> = KILL %T1_Z, %T1_XYZW<imp-use,kill>, %T1_XYZW<imp-def>
%T1_Y<def> = KILL %T1_Y, %T1_XYZW<imp-use,kill>, %T1_XYZW<imp-def>
%T1_X<def> = KILL %T1_X, %T1_XYZW<imp-use,kill>, %T1_XYZW<imp-def>
%T2_X<def> = MOV 1, 0, 0, 0, %C1_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0, %T2_XYZW<imp-def>
%T2_Y<def> = MOV 1, 0, 0, 0, %C1_Y, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0, %T2_XYZW<imp-def>
%T2_Z<def> = MOV 1, 0, 0, 0, %C1_Z, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0, %T2_XYZW<imp-def>
%T2_W<def> = MOV 1, 0, 0, 0, %C1_W, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0, %T2_XYZW<imp-def>
%T3_X<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0
    Successors according to CFG: BB#1

BB#1: derived from LLVM BB %25
    Live Ins: %T1_XYZW %T2_XYZW %T3_X
    Predecessors according to CFG: BB#0 BB#3
%T3_Y<def> = SETGT_INT 0, 0, 1, 0, 0, 0, %C0_X, 0, 0, 0, %T3_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0
%PREDICATE_BIT<def> = PRED_X %T3_Y<kill>, 152, 16
JUMP <BB#3>, pred:%PREDICATE_BIT<kill>
    Successors according to CFG: BB#2(4) BB#3(124)

BB#2: derived from LLVM BB %31
    Live Ins: %T1_XYZW %T2_XYZW
    Predecessors according to CFG: BB#1
RETURN %T1_W<imp-use>, %T1_Z<imp-use>, %T1_Y<imp-use>, %T1_X<imp-use>, %T2_W<imp-use>, %T2_Z<imp-use>, %T2_Y<imp-use>, %T2_X<imp-use>

BB#3: derived from LLVM BB %41
    Live Ins: %T1_XYZW %T2_XYZW %T3_X
    Predecessors according to CFG: BB#1
%T3_Y<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 1
%T3_X<def> = ADD_INT 0, 0, 1, 0, 0, 0, %T3_X<kill>, 0, 0, 0, %T3_Y<kill>, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0
JUMP <BB#1>, pred:%noreg
    Successors according to CFG: BB#1

# End machine code for function main.

# *** IR Dump Before Post RA top-down list latency scheduler ***:
# Machine code for function main: Post SSA, not tracking liveness
Function Live Ins: %T1_X in %vreg14, %T1_Y in %vreg15, %T1_Z in %vreg16, %T1_W in %vreg17
Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X

BB#0: derived from LLVM BB %0
    Live Ins: %T1_X %T1_Y %T1_Z %T1_W
%T1_W<def> = KILL %T1_W, %T1_XYZW<imp-def>
%T1_Z<def> = KILL %T1_Z, %T1_XYZW<imp-use,kill>, %T1_XYZW<imp-def>
%T1_Y<def> = KILL %T1_Y, %T1_XYZW<imp-use,kill>, %T1_XYZW<imp-def>
%T1_X<def> = KILL %T1_X, %T1_XYZW<imp-use,kill>, %T1_XYZW<imp-def>
%T2_X<def> = MOV 1, 0, 0, 0, %C1_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0, %T2_XYZW<imp-def>
%T2_Y<def> = MOV 1, 0, 0, 0, %C1_Y, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0, %T2_XYZW<imp-def>
%T2_Z<def> = MOV 1, 0, 0, 0, %C1_Z, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0, %T2_XYZW<imp-def>
%T2_W<def> = MOV 1, 0, 0, 0, %C1_W, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0, %T2_XYZW<imp-def>
%T3_X<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0
    Successors according to CFG: BB#1

BB#1: derived from LLVM BB %25
    Live Ins: %T1_XYZW %T2_XYZW %T3_X
    Predecessors according to CFG: BB#0 BB#3
%T3_Y<def> = SETGT_INT 0, 0, 1, 0, 0, 0, %C0_X, 0, 0, 0, %T3_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0
%PREDICATE_BIT<def> = PRED_X %T3_Y<kill>, 152, 16
JUMP <BB#3>, pred:%PREDICATE_BIT<kill>
    Successors according to CFG: BB#2(4) BB#3(124)

BB#2: derived from LLVM BB %31
    Live Ins: %T1_XYZW %T2_XYZW
    Predecessors according to CFG: BB#1
RETURN %T1_W<imp-use>, %T1_Z<imp-use>, %T1_Y<imp-use>, %T1_X<imp-use>, %T2_W<imp-use>, %T2_Z<imp-use>, %T2_Y<imp-use>, %T2_X<imp-use>

BB#3: derived from LLVM BB %41
    Live Ins: %T1_XYZW %T2_XYZW %T3_X
    Predecessors according to CFG: BB#1
%T3_Y<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 1
%T3_X<def> = ADD_INT 0, 0, 1, 0, 0, 0, %T3_X<kill>, 0, 0, 0, %T3_Y<kill>, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0
JUMP <BB#1>, pred:%noreg
    Successors according to CFG: BB#1

# End machine code for function main.

# *** IR Dump Before Analyze Machine Code For Garbage Collection ***:
# Machine code for function main: Post SSA, not tracking liveness
Function Live Ins: %T1_X in %vreg14, %T1_Y in %vreg15, %T1_Z in %vreg16, %T1_W in %vreg17
Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X

BB#0: derived from LLVM BB %0
    Live Ins: %T1_X %T1_Y %T1_Z %T1_W
%T1_W<def> = KILL %T1_W, %T1_XYZW<imp-def>
%T1_Z<def> = KILL %T1_Z, %T1_XYZW<imp-use,kill>, %T1_XYZW<imp-def>
%T1_Y<def> = KILL %T1_Y, %T1_XYZW<imp-use,kill>, %T1_XYZW<imp-def>
%T1_X<def> = KILL %T1_X, %T1_XYZW<imp-use,kill>, %T1_XYZW<imp-def>
%T2_X<def> = MOV 1, 0, 0, 0, %C1_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0, %T2_XYZW<imp-def>
%T2_Y<def> = MOV 1, 0, 0, 0, %C1_Y, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0, %T2_XYZW<imp-def>
%T2_Z<def> = MOV 1, 0, 0, 0, %C1_Z, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0, %T2_XYZW<imp-def>
%T2_W<def> = MOV 1, 0, 0, 0, %C1_W, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0, %T2_XYZW<imp-def>
%T3_X<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0
    Successors according to CFG: BB#1

BB#1: derived from LLVM BB %25
    Live Ins: %T1_XYZW %T2_XYZW %T3_X
    Predecessors according to CFG: BB#0 BB#3
%T3_Y<def> = SETGT_INT 0, 0, 1, 0, 0, 0, %C0_X, 0, 0, 0, %T3_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0
%PREDICATE_BIT<def> = PRED_X %T3_Y<kill>, 152, 16
JUMP <BB#3>, pred:%PREDICATE_BIT<kill>
    Successors according to CFG: BB#2(4) BB#3(124)

BB#2: derived from LLVM BB %31
    Live Ins: %T1_XYZW %T2_XYZW
    Predecessors according to CFG: BB#1
RETURN %T1_W<imp-use>, %T1_Z<imp-use>, %T1_Y<imp-use>, %T1_X<imp-use>, %T2_W<imp-use>, %T2_Z<imp-use>, %T2_Y<imp-use>, %T2_X<imp-use>

BB#3: derived from LLVM BB %41
    Live Ins: %T1_XYZW %T2_XYZW %T3_X
    Predecessors according to CFG: BB#1
%T3_Y<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 1
%T3_X<def> = ADD_INT 0, 0, 1, 0, 0, 0, %T3_X<kill>, 0, 0, 0, %T3_Y<kill>, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0
JUMP <BB#1>, pred:%noreg
    Successors according to CFG: BB#1

# End machine code for function main.

# *** IR Dump Before Branch Probability Basic Block Placement ***:
# Machine code for function main: Post SSA, not tracking liveness
Function Live Ins: %T1_X in %vreg14, %T1_Y in %vreg15, %T1_Z in %vreg16, %T1_W in %vreg17
Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X

BB#0: derived from LLVM BB %0
    Live Ins: %T1_X %T1_Y %T1_Z %T1_W
%T1_W<def> = KILL %T1_W, %T1_XYZW<imp-def>
%T1_Z<def> = KILL %T1_Z, %T1_XYZW<imp-use,kill>, %T1_XYZW<imp-def>
%T1_Y<def> = KILL %T1_Y, %T1_XYZW<imp-use,kill>, %T1_XYZW<imp-def>
%T1_X<def> = KILL %T1_X, %T1_XYZW<imp-use,kill>, %T1_XYZW<imp-def>
%T2_X<def> = MOV 1, 0, 0, 0, %C1_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0, %T2_XYZW<imp-def>
%T2_Y<def> = MOV 1, 0, 0, 0, %C1_Y, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0, %T2_XYZW<imp-def>
%T2_Z<def> = MOV 1, 0, 0, 0, %C1_Z, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0, %T2_XYZW<imp-def>
%T2_W<def> = MOV 1, 0, 0, 0, %C1_W, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0, %T2_XYZW<imp-def>
%T3_X<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0
    Successors according to CFG: BB#1

BB#1: derived from LLVM BB %25
    Live Ins: %T1_XYZW %T2_XYZW %T3_X
    Predecessors according to CFG: BB#0 BB#3
%T3_Y<def> = SETGT_INT 0, 0, 1, 0, 0, 0, %C0_X, 0, 0, 0, %T3_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0
%PREDICATE_BIT<def> = PRED_X %T3_Y<kill>, 152, 16
JUMP <BB#3>, pred:%PREDICATE_BIT<kill>
    Successors according to CFG: BB#2(4) BB#3(124)

BB#2: derived from LLVM BB %31
    Live Ins: %T1_XYZW %T2_XYZW
    Predecessors according to CFG: BB#1
RETURN %T1_W<imp-use>, %T1_Z<imp-use>, %T1_Y<imp-use>, %T1_X<imp-use>, %T2_W<imp-use>, %T2_Z<imp-use>, %T2_Y<imp-use>, %T2_X<imp-use>

BB#3: derived from LLVM BB %41
    Live Ins: %T1_XYZW %T2_XYZW %T3_X
    Predecessors according to CFG: BB#1
%T3_Y<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 1
%T3_X<def> = ADD_INT 0, 0, 1, 0, 0, 0, %T3_X<kill>, 0, 0, 0, %T3_Y<kill>, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0
JUMP <BB#1>, pred:%noreg
    Successors according to CFG: BB#1

# End machine code for function main.

# *** IR Dump Before Finalize machine instruction bundles ***:
# Machine code for function main: Post SSA, not tracking liveness
Function Live Ins: %T1_X in %vreg14, %T1_Y in %vreg15, %T1_Z in %vreg16, %T1_W in %vreg17
Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X

BB#0: derived from LLVM BB %0
    Live Ins: %T1_X %T1_Y %T1_Z %T1_W
%T1_W<def> = KILL %T1_W, %T1_XYZW<imp-def>
%T1_Z<def> = KILL %T1_Z, %T1_XYZW<imp-use,kill>, %T1_XYZW<imp-def>
%T1_Y<def> = KILL %T1_Y, %T1_XYZW<imp-use,kill>, %T1_XYZW<imp-def>
%T1_X<def> = KILL %T1_X, %T1_XYZW<imp-use,kill>, %T1_XYZW<imp-def>
%T2_X<def> = MOV 1, 0, 0, 0, %C1_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0, %T2_XYZW<imp-def>
%T2_Y<def> = MOV 1, 0, 0, 0, %C1_Y, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0, %T2_XYZW<imp-def>
%T2_Z<def> = MOV 1, 0, 0, 0, %C1_Z, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0, %T2_XYZW<imp-def>
%T2_W<def> = MOV 1, 0, 0, 0, %C1_W, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0, %T2_XYZW<imp-def>
%T3_X<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0
WHILELOOP
%T3_Y<def> = SETGT_INT 0, 0, 1, 0, 0, 0, %C0_X, 0, 0, 0, %T3_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0
%PREDICATE_BIT<def> = PRED_SETE_INT 1, 0, 0, 0, 0, 0, %T3_Y, 0, 0, 0, %ZERO, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0
BREAK_LOGICALZ_i32 %PREDICATE_BIT
%T3_Y<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 1
%T3_X<def> = ADD_INT 0, 0, 1, 0, 0, 0, %T3_X<kill>, 0, 0, 0, %T3_Y<kill>, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0
ENDLOOP
RETURN %T1_W<imp-use>, %T1_Z<imp-use>, %T1_Y<imp-use>, %T1_X<imp-use>, %T2_W<imp-use>, %T2_Z<imp-use>, %T2_Y<imp-use>, %T2_X<imp-use>

# End machine code for function main.





More information about the llvm-dev mailing list