[LLVMdev] RegisterCoalescing pass crashes with ImplicitDef registers

Vincent Lejeune vljn at ovi.com
Sat Oct 20 13:23:46 PDT 2012


Hi,

below is an output of "llc -march=r600 -mcpu=cayman -print-before-all -debug-only=regalloc file.shader" command from llvm3.2svn.
The register coalescing pass crashes when joining vreg12:sel_z with vreg13 registers, because it tries to access the interval liveness of vreg13... which is undefined.

I don't know if it's a bug of the pass, or if my backend should do something specific before calling the pass.
It worked with llvm 3.1, I don't know if there was a requirement introduced between 3.1 and current trunk related to register coalescing.

Regards,
Vincent

*** IR Dump Before Preliminary module verification ***
define void @main() {
  call void @llvm.AMDGPU.reserve.reg(i32 0)
  call void @llvm.AMDGPU.reserve.reg(i32 1)
  call void @llvm.AMDGPU.reserve.reg(i32 2)
  call void @llvm.AMDGPU.reserve.reg(i32 3)
  %1 = call float @llvm.R600.load.input(i32 4)
  %2 = insertelement <4 x float> undef, float %1, i32 0
  %3 = call float @llvm.R600.load.input(i32 5)
  %4 = insertelement <4 x float> %2, float %3, i32 1
  %5 = call float @llvm.R600.load.input(i32 6)
  %6 = insertelement <4 x float> %4, float %5, i32 2
  %7 = call float @llvm.R600.load.input(i32 7)
  %8 = insertelement <4 x float> %6, float %7, i32 3
  %9 = shufflevector <4 x float> undef, <4 x float> %8, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
  %10 = shufflevector <4 x float> %8, <4 x float> %8, <2 x i32> <i32 0, i32 1>
  %11 = shufflevector <2 x float> %10, <2 x float> %10, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
  %12 = shufflevector <4 x float> undef, <4 x float> %11, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
  %13 = fsub <2 x float> zeroinitializer, %10
  %14 = shufflevector <2 x float> %13, <2 x float> %13, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
  %15 = shufflevector <4 x float> %12, <4 x float> %14, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
  %16 = shufflevector <4 x float> undef, <4 x float> %15, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
  %17 = extractelement <4 x float> %16, i32 0
  call void @llvm.AMDGPU.store.output(float %17, i32 8)
  %18 = extractelement <4 x float> %16, i32 1
  call void @llvm.AMDGPU.store.output(float %18, i32 9)
  %19 = extractelement <4 x float> %16, i32 2
  call void @llvm.AMDGPU.store.output(float %19, i32 10)
  %20 = extractelement <4 x float> %16, i32 3
  call void @llvm.AMDGPU.store.output(float %20, i32 11)
  %21 = extractelement <4 x float> %9, i32 0
  call void @llvm.AMDGPU.store.output(float %21, i32 4)
  %22 = extractelement <4 x float> %9, i32 1
  call void @llvm.AMDGPU.store.output(float %22, i32 5)
  %23 = extractelement <4 x float> %9, i32 2
  call void @llvm.AMDGPU.store.output(float %23, i32 6)
  %24 = extractelement <4 x float> %9, i32 3
  call void @llvm.AMDGPU.store.output(float %24, i32 7)
  ret void
}
*** IR Dump Before Module Verifier ***
define void @main() {
  call void @llvm.AMDGPU.reserve.reg(i32 0)
  call void @llvm.AMDGPU.reserve.reg(i32 1)
  call void @llvm.AMDGPU.reserve.reg(i32 2)
  call void @llvm.AMDGPU.reserve.reg(i32 3)
  %1 = call float @llvm.R600.load.input(i32 4)
  %2 = insertelement <4 x float> undef, float %1, i32 0
  %3 = call float @llvm.R600.load.input(i32 5)
  %4 = insertelement <4 x float> %2, float %3, i32 1
  %5 = call float @llvm.R600.load.input(i32 6)
  %6 = insertelement <4 x float> %4, float %5, i32 2
  %7 = call float @llvm.R600.load.input(i32 7)
  %8 = insertelement <4 x float> %6, float %7, i32 3
  %9 = shufflevector <4 x float> undef, <4 x float> %8, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
  %10 = shufflevector <4 x float> %8, <4 x float> %8, <2 x i32> <i32 0, i32 1>
  %11 = shufflevector <2 x float> %10, <2 x float> %10, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
  %12 = shufflevector <4 x float> undef, <4 x float> %11, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
  %13 = fsub <2 x float> zeroinitializer, %10
  %14 = shufflevector <2 x float> %13, <2 x float> %13, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
  %15 = shufflevector <4 x float> %12, <4 x float> %14, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
  %16 = shufflevector <4 x float> undef, <4 x float> %15, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
  %17 = extractelement <4 x float> %16, i32 0
  call void @llvm.AMDGPU.store.output(float %17, i32 8)
  %18 = extractelement <4 x float> %16, i32 1
  call void @llvm.AMDGPU.store.output(float %18, i32 9)
  %19 = extractelement <4 x float> %16, i32 2
  call void @llvm.AMDGPU.store.output(float %19, i32 10)
  %20 = extractelement <4 x float> %16, i32 3
  call void @llvm.AMDGPU.store.output(float %20, i32 11)
  %21 = extractelement <4 x float> %9, i32 0
  call void @llvm.AMDGPU.store.output(float %21, i32 4)
  %22 = extractelement <4 x float> %9, i32 1
  call void @llvm.AMDGPU.store.output(float %22, i32 5)
  %23 = extractelement <4 x float> %9, i32 2
  call void @llvm.AMDGPU.store.output(float %23, i32 6)
  %24 = extractelement <4 x float> %9, i32 3
  call void @llvm.AMDGPU.store.output(float %24, i32 7)
  ret void
}
*** IR Dump Before Lower Garbage Collection Instructions ***
define void @main() {
  call void @llvm.AMDGPU.reserve.reg(i32 0)
  call void @llvm.AMDGPU.reserve.reg(i32 1)
  call void @llvm.AMDGPU.reserve.reg(i32 2)
  call void @llvm.AMDGPU.reserve.reg(i32 3)
  %1 = call float @llvm.R600.load.input(i32 4)
  %2 = insertelement <4 x float> undef, float %1, i32 0
  %3 = call float @llvm.R600.load.input(i32 5)
  %4 = insertelement <4 x float> %2, float %3, i32 1
  %5 = call float @llvm.R600.load.input(i32 6)
  %6 = insertelement <4 x float> %4, float %5, i32 2
  %7 = call float @llvm.R600.load.input(i32 7)
  %8 = insertelement <4 x float> %6, float %7, i32 3
  %9 = shufflevector <4 x float> undef, <4 x float> %8, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
  %10 = shufflevector <4 x float> %8, <4 x float> %8, <2 x i32> <i32 0, i32 1>
  %11 = shufflevector <2 x float> %10, <2 x float> %10, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
  %12 = shufflevector <4 x float> undef, <4 x float> %11, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
  %13 = fsub <2 x float> zeroinitializer, %10
  %14 = shufflevector <2 x float> %13, <2 x float> %13, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
  %15 = shufflevector <4 x float> %12, <4 x float> %14, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
  %16 = shufflevector <4 x float> undef, <4 x float> %15, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
  %17 = extractelement <4 x float> %16, i32 0
  call void @llvm.AMDGPU.store.output(float %17, i32 8)
  %18 = extractelement <4 x float> %16, i32 1
  call void @llvm.AMDGPU.store.output(float %18, i32 9)
  %19 = extractelement <4 x float> %16, i32 2
  call void @llvm.AMDGPU.store.output(float %19, i32 10)
  %20 = extractelement <4 x float> %16, i32 3
  call void @llvm.AMDGPU.store.output(float %20, i32 11)
  %21 = extractelement <4 x float> %9, i32 0
  call void @llvm.AMDGPU.store.output(float %21, i32 4)
  %22 = extractelement <4 x float> %9, i32 1
  call void @llvm.AMDGPU.store.output(float %22, i32 5)
  %23 = extractelement <4 x float> %9, i32 2
  call void @llvm.AMDGPU.store.output(float %23, i32 6)
  %24 = extractelement <4 x float> %9, i32 3
  call void @llvm.AMDGPU.store.output(float %24, i32 7)
  ret void
}
*** IR Dump Before Remove unreachable blocks from the CFG ***
define void @main() {
  call void @llvm.AMDGPU.reserve.reg(i32 0)
  call void @llvm.AMDGPU.reserve.reg(i32 1)
  call void @llvm.AMDGPU.reserve.reg(i32 2)
  call void @llvm.AMDGPU.reserve.reg(i32 3)
  %1 = call float @llvm.R600.load.input(i32 4)
  %2 = insertelement <4 x float> undef, float %1, i32 0
  %3 = call float @llvm.R600.load.input(i32 5)
  %4 = insertelement <4 x float> %2, float %3, i32 1
  %5 = call float @llvm.R600.load.input(i32 6)
  %6 = insertelement <4 x float> %4, float %5, i32 2
  %7 = call float @llvm.R600.load.input(i32 7)
  %8 = insertelement <4 x float> %6, float %7, i32 3
  %9 = shufflevector <4 x float> undef, <4 x float> %8, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
  %10 = shufflevector <4 x float> %8, <4 x float> %8, <2 x i32> <i32 0, i32 1>
  %11 = shufflevector <2 x float> %10, <2 x float> %10, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
  %12 = shufflevector <4 x float> undef, <4 x float> %11, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
  %13 = fsub <2 x float> zeroinitializer, %10
  %14 = shufflevector <2 x float> %13, <2 x float> %13, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
  %15 = shufflevector <4 x float> %12, <4 x float> %14, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
  %16 = shufflevector <4 x float> undef, <4 x float> %15, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
  %17 = extractelement <4 x float> %16, i32 0
  call void @llvm.AMDGPU.store.output(float %17, i32 8)
  %18 = extractelement <4 x float> %16, i32 1
  call void @llvm.AMDGPU.store.output(float %18, i32 9)
  %19 = extractelement <4 x float> %16, i32 2
  call void @llvm.AMDGPU.store.output(float %19, i32 10)
  %20 = extractelement <4 x float> %16, i32 3
  call void @llvm.AMDGPU.store.output(float %20, i32 11)
  %21 = extractelement <4 x float> %9, i32 0
  call void @llvm.AMDGPU.store.output(float %21, i32 4)
  %22 = extractelement <4 x float> %9, i32 1
  call void @llvm.AMDGPU.store.output(float %22, i32 5)
  %23 = extractelement <4 x float> %9, i32 2
  call void @llvm.AMDGPU.store.output(float %23, i32 6)
  %24 = extractelement <4 x float> %9, i32 3
  call void @llvm.AMDGPU.store.output(float %24, i32 7)
  ret void
}
*** IR Dump Before Lower invoke and unwind, for unwindless code generators ***
define void @main() {
  call void @llvm.AMDGPU.reserve.reg(i32 0)
  call void @llvm.AMDGPU.reserve.reg(i32 1)
  call void @llvm.AMDGPU.reserve.reg(i32 2)
  call void @llvm.AMDGPU.reserve.reg(i32 3)
  %1 = call float @llvm.R600.load.input(i32 4)
  %2 = insertelement <4 x float> undef, float %1, i32 0
  %3 = call float @llvm.R600.load.input(i32 5)
  %4 = insertelement <4 x float> %2, float %3, i32 1
  %5 = call float @llvm.R600.load.input(i32 6)
  %6 = insertelement <4 x float> %4, float %5, i32 2
  %7 = call float @llvm.R600.load.input(i32 7)
  %8 = insertelement <4 x float> %6, float %7, i32 3
  %9 = shufflevector <4 x float> undef, <4 x float> %8, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
  %10 = shufflevector <4 x float> %8, <4 x float> %8, <2 x i32> <i32 0, i32 1>
  %11 = shufflevector <2 x float> %10, <2 x float> %10, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
  %12 = shufflevector <4 x float> undef, <4 x float> %11, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
  %13 = fsub <2 x float> zeroinitializer, %10
  %14 = shufflevector <2 x float> %13, <2 x float> %13, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
  %15 = shufflevector <4 x float> %12, <4 x float> %14, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
  %16 = shufflevector <4 x float> undef, <4 x float> %15, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
  %17 = extractelement <4 x float> %16, i32 0
  call void @llvm.AMDGPU.store.output(float %17, i32 8)
  %18 = extractelement <4 x float> %16, i32 1
  call void @llvm.AMDGPU.store.output(float %18, i32 9)
  %19 = extractelement <4 x float> %16, i32 2
  call void @llvm.AMDGPU.store.output(float %19, i32 10)
  %20 = extractelement <4 x float> %16, i32 3
  call void @llvm.AMDGPU.store.output(float %20, i32 11)
  %21 = extractelement <4 x float> %9, i32 0
  call void @llvm.AMDGPU.store.output(float %21, i32 4)
  %22 = extractelement <4 x float> %9, i32 1
  call void @llvm.AMDGPU.store.output(float %22, i32 5)
  %23 = extractelement <4 x float> %9, i32 2
  call void @llvm.AMDGPU.store.output(float %23, i32 6)
  %24 = extractelement <4 x float> %9, i32 3
  call void @llvm.AMDGPU.store.output(float %24, i32 7)
  ret void
}
*** IR Dump Before Remove unreachable blocks from the CFG ***
define void @main() {
  call void @llvm.AMDGPU.reserve.reg(i32 0)
  call void @llvm.AMDGPU.reserve.reg(i32 1)
  call void @llvm.AMDGPU.reserve.reg(i32 2)
  call void @llvm.AMDGPU.reserve.reg(i32 3)
  %1 = call float @llvm.R600.load.input(i32 4)
  %2 = insertelement <4 x float> undef, float %1, i32 0
  %3 = call float @llvm.R600.load.input(i32 5)
  %4 = insertelement <4 x float> %2, float %3, i32 1
  %5 = call float @llvm.R600.load.input(i32 6)
  %6 = insertelement <4 x float> %4, float %5, i32 2
  %7 = call float @llvm.R600.load.input(i32 7)
  %8 = insertelement <4 x float> %6, float %7, i32 3
  %9 = shufflevector <4 x float> undef, <4 x float> %8, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
  %10 = shufflevector <4 x float> %8, <4 x float> %8, <2 x i32> <i32 0, i32 1>
  %11 = shufflevector <2 x float> %10, <2 x float> %10, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
  %12 = shufflevector <4 x float> undef, <4 x float> %11, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
  %13 = fsub <2 x float> zeroinitializer, %10
  %14 = shufflevector <2 x float> %13, <2 x float> %13, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
  %15 = shufflevector <4 x float> %12, <4 x float> %14, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
  %16 = shufflevector <4 x float> undef, <4 x float> %15, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
  %17 = extractelement <4 x float> %16, i32 0
  call void @llvm.AMDGPU.store.output(float %17, i32 8)
  %18 = extractelement <4 x float> %16, i32 1
  call void @llvm.AMDGPU.store.output(float %18, i32 9)
  %19 = extractelement <4 x float> %16, i32 2
  call void @llvm.AMDGPU.store.output(float %19, i32 10)
  %20 = extractelement <4 x float> %16, i32 3
  call void @llvm.AMDGPU.store.output(float %20, i32 11)
  %21 = extractelement <4 x float> %9, i32 0
  call void @llvm.AMDGPU.store.output(float %21, i32 4)
  %22 = extractelement <4 x float> %9, i32 1
  call void @llvm.AMDGPU.store.output(float %22, i32 5)
  %23 = extractelement <4 x float> %9, i32 2
  call void @llvm.AMDGPU.store.output(float %23, i32 6)
  %24 = extractelement <4 x float> %9, i32 3
  call void @llvm.AMDGPU.store.output(float %24, i32 7)
  ret void
}
*** IR Dump Before Optimize for code generation ***
define void @main() {
  call void @llvm.AMDGPU.reserve.reg(i32 0)
  call void @llvm.AMDGPU.reserve.reg(i32 1)
  call void @llvm.AMDGPU.reserve.reg(i32 2)
  call void @llvm.AMDGPU.reserve.reg(i32 3)
  %1 = call float @llvm.R600.load.input(i32 4)
  %2 = insertelement <4 x float> undef, float %1, i32 0
  %3 = call float @llvm.R600.load.input(i32 5)
  %4 = insertelement <4 x float> %2, float %3, i32 1
  %5 = call float @llvm.R600.load.input(i32 6)
  %6 = insertelement <4 x float> %4, float %5, i32 2
  %7 = call float @llvm.R600.load.input(i32 7)
  %8 = insertelement <4 x float> %6, float %7, i32 3
  %9 = shufflevector <4 x float> undef, <4 x float> %8, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
  %10 = shufflevector <4 x float> %8, <4 x float> %8, <2 x i32> <i32 0, i32 1>
  %11 = shufflevector <2 x float> %10, <2 x float> %10, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
  %12 = shufflevector <4 x float> undef, <4 x float> %11, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
  %13 = fsub <2 x float> zeroinitializer, %10
  %14 = shufflevector <2 x float> %13, <2 x float> %13, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
  %15 = shufflevector <4 x float> %12, <4 x float> %14, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
  %16 = shufflevector <4 x float> undef, <4 x float> %15, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
  %17 = extractelement <4 x float> %16, i32 0
  call void @llvm.AMDGPU.store.output(float %17, i32 8)
  %18 = extractelement <4 x float> %16, i32 1
  call void @llvm.AMDGPU.store.output(float %18, i32 9)
  %19 = extractelement <4 x float> %16, i32 2
  call void @llvm.AMDGPU.store.output(float %19, i32 10)
  %20 = extractelement <4 x float> %16, i32 3
  call void @llvm.AMDGPU.store.output(float %20, i32 11)
  %21 = extractelement <4 x float> %9, i32 0
  call void @llvm.AMDGPU.store.output(float %21, i32 4)
  %22 = extractelement <4 x float> %9, i32 1
  call void @llvm.AMDGPU.store.output(float %22, i32 5)
  %23 = extractelement <4 x float> %9, i32 2
  call void @llvm.AMDGPU.store.output(float %23, i32 6)
  %24 = extractelement <4 x float> %9, i32 3
  call void @llvm.AMDGPU.store.output(float %24, i32 7)
  ret void
}
*** IR Dump Before Insert stack protectors ***
define void @main() {
  call void @llvm.AMDGPU.reserve.reg(i32 0)
  call void @llvm.AMDGPU.reserve.reg(i32 1)
  call void @llvm.AMDGPU.reserve.reg(i32 2)
  call void @llvm.AMDGPU.reserve.reg(i32 3)
  %1 = call float @llvm.R600.load.input(i32 4)
  %2 = insertelement <4 x float> undef, float %1, i32 0
  %3 = call float @llvm.R600.load.input(i32 5)
  %4 = insertelement <4 x float> %2, float %3, i32 1
  %5 = call float @llvm.R600.load.input(i32 6)
  %6 = insertelement <4 x float> %4, float %5, i32 2
  %7 = call float @llvm.R600.load.input(i32 7)
  %8 = insertelement <4 x float> %6, float %7, i32 3
  %9 = shufflevector <4 x float> undef, <4 x float> %8, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
  %10 = shufflevector <4 x float> %8, <4 x float> %8, <2 x i32> <i32 0, i32 1>
  %11 = shufflevector <2 x float> %10, <2 x float> %10, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
  %12 = shufflevector <4 x float> undef, <4 x float> %11, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
  %13 = fsub <2 x float> zeroinitializer, %10
  %14 = shufflevector <2 x float> %13, <2 x float> %13, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
  %15 = shufflevector <4 x float> %12, <4 x float> %14, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
  %16 = shufflevector <4 x float> undef, <4 x float> %15, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
  %17 = extractelement <4 x float> %16, i32 0
  call void @llvm.AMDGPU.store.output(float %17, i32 8)
  %18 = extractelement <4 x float> %16, i32 1
  call void @llvm.AMDGPU.store.output(float %18, i32 9)
  %19 = extractelement <4 x float> %16, i32 2
  call void @llvm.AMDGPU.store.output(float %19, i32 10)
  %20 = extractelement <4 x float> %16, i32 3
  call void @llvm.AMDGPU.store.output(float %20, i32 11)
  %21 = extractelement <4 x float> %9, i32 0
  call void @llvm.AMDGPU.store.output(float %21, i32 4)
  %22 = extractelement <4 x float> %9, i32 1
  call void @llvm.AMDGPU.store.output(float %22, i32 5)
  %23 = extractelement <4 x float> %9, i32 2
  call void @llvm.AMDGPU.store.output(float %23, i32 6)
  %24 = extractelement <4 x float> %9, i32 3
  call void @llvm.AMDGPU.store.output(float %24, i32 7)
  ret void
}
*** IR Dump Before Preliminary module verification ***
define void @main() {
  call void @llvm.AMDGPU.reserve.reg(i32 0)
  call void @llvm.AMDGPU.reserve.reg(i32 1)
  call void @llvm.AMDGPU.reserve.reg(i32 2)
  call void @llvm.AMDGPU.reserve.reg(i32 3)
  %1 = call float @llvm.R600.load.input(i32 4)
  %2 = insertelement <4 x float> undef, float %1, i32 0
  %3 = call float @llvm.R600.load.input(i32 5)
  %4 = insertelement <4 x float> %2, float %3, i32 1
  %5 = call float @llvm.R600.load.input(i32 6)
  %6 = insertelement <4 x float> %4, float %5, i32 2
  %7 = call float @llvm.R600.load.input(i32 7)
  %8 = insertelement <4 x float> %6, float %7, i32 3
  %9 = shufflevector <4 x float> undef, <4 x float> %8, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
  %10 = shufflevector <4 x float> %8, <4 x float> %8, <2 x i32> <i32 0, i32 1>
  %11 = shufflevector <2 x float> %10, <2 x float> %10, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
  %12 = shufflevector <4 x float> undef, <4 x float> %11, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
  %13 = fsub <2 x float> zeroinitializer, %10
  %14 = shufflevector <2 x float> %13, <2 x float> %13, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
  %15 = shufflevector <4 x float> %12, <4 x float> %14, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
  %16 = shufflevector <4 x float> undef, <4 x float> %15, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
  %17 = extractelement <4 x float> %16, i32 0
  call void @llvm.AMDGPU.store.output(float %17, i32 8)
  %18 = extractelement <4 x float> %16, i32 1
  call void @llvm.AMDGPU.store.output(float %18, i32 9)
  %19 = extractelement <4 x float> %16, i32 2
  call void @llvm.AMDGPU.store.output(float %19, i32 10)
  %20 = extractelement <4 x float> %16, i32 3
  call void @llvm.AMDGPU.store.output(float %20, i32 11)
  %21 = extractelement <4 x float> %9, i32 0
  call void @llvm.AMDGPU.store.output(float %21, i32 4)
  %22 = extractelement <4 x float> %9, i32 1
  call void @llvm.AMDGPU.store.output(float %22, i32 5)
  %23 = extractelement <4 x float> %9, i32 2
  call void @llvm.AMDGPU.store.output(float %23, i32 6)
  %24 = extractelement <4 x float> %9, i32 3
  call void @llvm.AMDGPU.store.output(float %24, i32 7)
  ret void
}
*** IR Dump Before Module Verifier ***
define void @main() {
  call void @llvm.AMDGPU.reserve.reg(i32 0)
  call void @llvm.AMDGPU.reserve.reg(i32 1)
  call void @llvm.AMDGPU.reserve.reg(i32 2)
  call void @llvm.AMDGPU.reserve.reg(i32 3)
  %1 = call float @llvm.R600.load.input(i32 4)
  %2 = insertelement <4 x float> undef, float %1, i32 0
  %3 = call float @llvm.R600.load.input(i32 5)
  %4 = insertelement <4 x float> %2, float %3, i32 1
  %5 = call float @llvm.R600.load.input(i32 6)
  %6 = insertelement <4 x float> %4, float %5, i32 2
  %7 = call float @llvm.R600.load.input(i32 7)
  %8 = insertelement <4 x float> %6, float %7, i32 3
  %9 = shufflevector <4 x float> undef, <4 x float> %8, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
  %10 = shufflevector <4 x float> %8, <4 x float> %8, <2 x i32> <i32 0, i32 1>
  %11 = shufflevector <2 x float> %10, <2 x float> %10, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
  %12 = shufflevector <4 x float> undef, <4 x float> %11, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
  %13 = fsub <2 x float> zeroinitializer, %10
  %14 = shufflevector <2 x float> %13, <2 x float> %13, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
  %15 = shufflevector <4 x float> %12, <4 x float> %14, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
  %16 = shufflevector <4 x float> undef, <4 x float> %15, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
  %17 = extractelement <4 x float> %16, i32 0
  call void @llvm.AMDGPU.store.output(float %17, i32 8)
  %18 = extractelement <4 x float> %16, i32 1
  call void @llvm.AMDGPU.store.output(float %18, i32 9)
  %19 = extractelement <4 x float> %16, i32 2
  call void @llvm.AMDGPU.store.output(float %19, i32 10)
  %20 = extractelement <4 x float> %16, i32 3
  call void @llvm.AMDGPU.store.output(float %20, i32 11)
  %21 = extractelement <4 x float> %9, i32 0
  call void @llvm.AMDGPU.store.output(float %21, i32 4)
  %22 = extractelement <4 x float> %9, i32 1
  call void @llvm.AMDGPU.store.output(float %22, i32 5)
  %23 = extractelement <4 x float> %9, i32 2
  call void @llvm.AMDGPU.store.output(float %23, i32 6)
  %24 = extractelement <4 x float> %9, i32 3
  call void @llvm.AMDGPU.store.output(float %24, i32 7)
  ret void
}
# *** IR Dump Before Expand ISel Pseudo-instructions ***:
# Machine code for function main: SSA
Function Live Ins: %T1_W in %vreg0, %T1_Z in %vreg1, %T1_Y in %vreg2, %T1_X in %vreg3
Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X

BB#0: derived from LLVM BB %0
    Live Ins: %T1_W %T1_Z %T1_Y %T1_X
%vreg3<def> = COPY %T1_X; R600_TReg32:%vreg3
%vreg2<def> = COPY %T1_Y; R600_TReg32:%vreg2
%vreg1<def> = COPY %T1_Z; R600_TReg32:%vreg1
%vreg0<def> = COPY %T1_W; R600_TReg32:%vreg0
RESERVE_REG 0
%vreg4<def> = FNEG_R600 %vreg3; R600_Reg32:%vreg4 R600_TReg32:%vreg3
%vreg5<def> = MOV_IMM_F32 0.000000e+00; R600_Reg32:%vreg5
%vreg6<def> = ADD 0, 0, 1, 0, 0, 0, %vreg4<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg6,%vreg4,%vreg5
%vreg7<def> = FNEG_R600 %vreg2; R600_Reg32:%vreg7 R600_TReg32:%vreg2
%vreg8<def> = ADD 0, 0, 1, 0, 0, 0, %vreg7<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg8,%vreg7,%vreg5
%vreg10<def> = IMPLICIT_DEF; R600_Reg128:%vreg10
%vreg9<def,tied1> = INSERT_SUBREG %vreg10<tied0>, %vreg6<kill>, sel_x; R600_Reg128:%vreg9,%vreg10 R600_Reg32:%vreg6
RESERVE_REG 1
RESERVE_REG 2
%vreg11<def,tied1> = INSERT_SUBREG %vreg9<tied0>, %vreg8<kill>, sel_y; R600_Reg128:%vreg11,%vreg9 R600_Reg32:%vreg8
%vreg13<def> = IMPLICIT_DEF; R600_Reg32:%vreg13
%vreg12<def,tied1> = INSERT_SUBREG %vreg11<tied0>, %vreg13, sel_z; R600_Reg128:%vreg12,%vreg11 R600_Reg32:%vreg13
RESERVE_REG 3
%vreg15<def> = IMPLICIT_DEF; R600_Reg32:%vreg15
%vreg14<def,tied1> = INSERT_SUBREG %vreg12<tied0>, %vreg15, sel_w; R600_Reg128:%vreg14,%vreg12 R600_Reg32:%vreg15
%T2_X<def> = COPY %vreg3; R600_TReg32:%vreg3
%vreg16<def> = COPY %vreg14:sel_x; R600_Reg32:%vreg16 R600_Reg128:%vreg14
%T2_Y<def> = COPY %vreg2; R600_TReg32:%vreg2
%vreg17<def> = COPY %vreg14:sel_y; R600_Reg32:%vreg17 R600_Reg128:%vreg14
%T2_Z<def> = COPY %vreg16; R600_Reg32:%vreg16
%T2_W<def> = COPY %vreg17; R600_Reg32:%vreg17
%T1_X<def> = COPY %vreg3; R600_TReg32:%vreg3
%T1_Y<def> = COPY %vreg2; R600_TReg32:%vreg2
%T1_Z<def> = COPY %vreg1; R600_TReg32:%vreg1
%T1_W<def> = COPY %vreg0; R600_TReg32:%vreg0
RETURN

# End machine code for function main.

# *** IR Dump Before Tail Duplication ***:
# Machine code for function main: SSA
Function Live Ins: %T1_W in %vreg0, %T1_Z in %vreg1, %T1_Y in %vreg2, %T1_X in %vreg3
Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X

BB#0: derived from LLVM BB %0
    Live Ins: %T1_W %T1_Z %T1_Y %T1_X
%vreg3<def> = COPY %T1_X; R600_TReg32:%vreg3
%vreg2<def> = COPY %T1_Y; R600_TReg32:%vreg2
%vreg1<def> = COPY %T1_Z; R600_TReg32:%vreg1
%vreg0<def> = COPY %T1_W; R600_TReg32:%vreg0
%vreg4<def> = MOV 1, 0, 0, 0, %vreg3, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg4 R600_TReg32:%vreg3
%vreg5<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg5
%vreg6<def> = ADD 0, 0, 1, 0, 0, 0, %vreg4<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg6,%vreg4,%vreg5
%vreg7<def> = MOV 1, 0, 0, 0, %vreg2, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg7 R600_TReg32:%vreg2
%vreg8<def> = ADD 0, 0, 1, 0, 0, 0, %vreg7<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg8,%vreg7,%vreg5
%vreg10<def> = IMPLICIT_DEF; R600_Reg128:%vreg10
%vreg9<def,tied1> = INSERT_SUBREG %vreg10<tied0>, %vreg6<kill>, sel_x; R600_Reg128:%vreg9,%vreg10 R600_Reg32:%vreg6
%vreg11<def,tied1> = INSERT_SUBREG %vreg9<tied0>, %vreg8<kill>, sel_y; R600_Reg128:%vreg11,%vreg9 R600_Reg32:%vreg8
%vreg13<def> = IMPLICIT_DEF; R600_Reg32:%vreg13
%vreg12<def,tied1> = INSERT_SUBREG %vreg11<tied0>, %vreg13, sel_z; R600_Reg128:%vreg12,%vreg11 R600_Reg32:%vreg13
%vreg15<def> = IMPLICIT_DEF; R600_Reg32:%vreg15
%vreg14<def,tied1> = INSERT_SUBREG %vreg12<tied0>, %vreg15, sel_w; R600_Reg128:%vreg14,%vreg12 R600_Reg32:%vreg15
%T2_X<def> = COPY %vreg3; R600_TReg32:%vreg3
%vreg16<def> = COPY %vreg14:sel_x; R600_Reg32:%vreg16 R600_Reg128:%vreg14
%T2_Y<def> = COPY %vreg2; R600_TReg32:%vreg2
%vreg17<def> = COPY %vreg14:sel_y; R600_Reg32:%vreg17 R600_Reg128:%vreg14
%T2_Z<def> = COPY %vreg16; R600_Reg32:%vreg16
%T2_W<def> = COPY %vreg17; R600_Reg32:%vreg17
%T1_X<def> = COPY %vreg3; R600_TReg32:%vreg3
%T1_Y<def> = COPY %vreg2; R600_TReg32:%vreg2
%T1_Z<def> = COPY %vreg1; R600_TReg32:%vreg1
%T1_W<def> = COPY %vreg0; R600_TReg32:%vreg0
RETURN

# End machine code for function main.

# *** IR Dump Before Optimize machine instruction PHIs ***:
# Machine code for function main: SSA
Function Live Ins: %T1_W in %vreg0, %T1_Z in %vreg1, %T1_Y in %vreg2, %T1_X in %vreg3
Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X

BB#0: derived from LLVM BB %0
    Live Ins: %T1_W %T1_Z %T1_Y %T1_X
%vreg3<def> = COPY %T1_X; R600_TReg32:%vreg3
%vreg2<def> = COPY %T1_Y; R600_TReg32:%vreg2
%vreg1<def> = COPY %T1_Z; R600_TReg32:%vreg1
%vreg0<def> = COPY %T1_W; R600_TReg32:%vreg0
%vreg4<def> = MOV 1, 0, 0, 0, %vreg3, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg4 R600_TReg32:%vreg3
%vreg5<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg5
%vreg6<def> = ADD 0, 0, 1, 0, 0, 0, %vreg4<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg6,%vreg4,%vreg5
%vreg7<def> = MOV 1, 0, 0, 0, %vreg2, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg7 R600_TReg32:%vreg2
%vreg8<def> = ADD 0, 0, 1, 0, 0, 0, %vreg7<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg8,%vreg7,%vreg5
%vreg10<def> = IMPLICIT_DEF; R600_Reg128:%vreg10
%vreg9<def,tied1> = INSERT_SUBREG %vreg10<tied0>, %vreg6<kill>, sel_x; R600_Reg128:%vreg9,%vreg10 R600_Reg32:%vreg6
%vreg11<def,tied1> = INSERT_SUBREG %vreg9<tied0>, %vreg8<kill>, sel_y; R600_Reg128:%vreg11,%vreg9 R600_Reg32:%vreg8
%vreg13<def> = IMPLICIT_DEF; R600_Reg32:%vreg13
%vreg12<def,tied1> = INSERT_SUBREG %vreg11<tied0>, %vreg13, sel_z; R600_Reg128:%vreg12,%vreg11 R600_Reg32:%vreg13
%vreg15<def> = IMPLICIT_DEF; R600_Reg32:%vreg15
%vreg14<def,tied1> = INSERT_SUBREG %vreg12<tied0>, %vreg15, sel_w; R600_Reg128:%vreg14,%vreg12 R600_Reg32:%vreg15
%T2_X<def> = COPY %vreg3; R600_TReg32:%vreg3
%vreg16<def> = COPY %vreg14:sel_x; R600_Reg32:%vreg16 R600_Reg128:%vreg14
%T2_Y<def> = COPY %vreg2; R600_TReg32:%vreg2
%vreg17<def> = COPY %vreg14:sel_y; R600_Reg32:%vreg17 R600_Reg128:%vreg14
%T2_Z<def> = COPY %vreg16; R600_Reg32:%vreg16
%T2_W<def> = COPY %vreg17; R600_Reg32:%vreg17
%T1_X<def> = COPY %vreg3; R600_TReg32:%vreg3
%T1_Y<def> = COPY %vreg2; R600_TReg32:%vreg2
%T1_Z<def> = COPY %vreg1; R600_TReg32:%vreg1
%T1_W<def> = COPY %vreg0; R600_TReg32:%vreg0
RETURN

# End machine code for function main.

# *** IR Dump Before Slot index numbering ***:
# Machine code for function main: SSA
Function Live Ins: %T1_W in %vreg0, %T1_Z in %vreg1, %T1_Y in %vreg2, %T1_X in %vreg3
Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X

BB#0: derived from LLVM BB %0
    Live Ins: %T1_W %T1_Z %T1_Y %T1_X
%vreg3<def> = COPY %T1_X; R600_TReg32:%vreg3
%vreg2<def> = COPY %T1_Y; R600_TReg32:%vreg2
%vreg1<def> = COPY %T1_Z; R600_TReg32:%vreg1
%vreg0<def> = COPY %T1_W; R600_TReg32:%vreg0
%vreg4<def> = MOV 1, 0, 0, 0, %vreg3, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg4 R600_TReg32:%vreg3
%vreg5<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg5
%vreg6<def> = ADD 0, 0, 1, 0, 0, 0, %vreg4<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg6,%vreg4,%vreg5
%vreg7<def> = MOV 1, 0, 0, 0, %vreg2, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg7 R600_TReg32:%vreg2
%vreg8<def> = ADD 0, 0, 1, 0, 0, 0, %vreg7<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg8,%vreg7,%vreg5
%vreg10<def> = IMPLICIT_DEF; R600_Reg128:%vreg10
%vreg9<def,tied1> = INSERT_SUBREG %vreg10<tied0>, %vreg6<kill>, sel_x; R600_Reg128:%vreg9,%vreg10 R600_Reg32:%vreg6
%vreg11<def,tied1> = INSERT_SUBREG %vreg9<tied0>, %vreg8<kill>, sel_y; R600_Reg128:%vreg11,%vreg9 R600_Reg32:%vreg8
%vreg13<def> = IMPLICIT_DEF; R600_Reg32:%vreg13
%vreg12<def,tied1> = INSERT_SUBREG %vreg11<tied0>, %vreg13, sel_z; R600_Reg128:%vreg12,%vreg11 R600_Reg32:%vreg13
%vreg15<def> = IMPLICIT_DEF; R600_Reg32:%vreg15
%vreg14<def,tied1> = INSERT_SUBREG %vreg12<tied0>, %vreg15, sel_w; R600_Reg128:%vreg14,%vreg12 R600_Reg32:%vreg15
%T2_X<def> = COPY %vreg3; R600_TReg32:%vreg3
%vreg16<def> = COPY %vreg14:sel_x; R600_Reg32:%vreg16 R600_Reg128:%vreg14
%T2_Y<def> = COPY %vreg2; R600_TReg32:%vreg2
%vreg17<def> = COPY %vreg14:sel_y; R600_Reg32:%vreg17 R600_Reg128:%vreg14
%T2_Z<def> = COPY %vreg16; R600_Reg32:%vreg16
%T2_W<def> = COPY %vreg17; R600_Reg32:%vreg17
%T1_X<def> = COPY %vreg3; R600_TReg32:%vreg3
%T1_Y<def> = COPY %vreg2; R600_TReg32:%vreg2
%T1_Z<def> = COPY %vreg1; R600_TReg32:%vreg1
%T1_W<def> = COPY %vreg0; R600_TReg32:%vreg0
RETURN

# End machine code for function main.

# *** IR Dump Before Merge disjoint stack slots ***:
# Machine code for function main: SSA
Function Live Ins: %T1_W in %vreg0, %T1_Z in %vreg1, %T1_Y in %vreg2, %T1_X in %vreg3
Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X

0BBB#0: derived from LLVM BB %0
    Live Ins: %T1_W %T1_Z %T1_Y %T1_X
16B%vreg3<def> = COPY %T1_X; R600_TReg32:%vreg3
32B%vreg2<def> = COPY %T1_Y; R600_TReg32:%vreg2
48B%vreg1<def> = COPY %T1_Z; R600_TReg32:%vreg1
64B%vreg0<def> = COPY %T1_W; R600_TReg32:%vreg0
80B%vreg4<def> = MOV 1, 0, 0, 0, %vreg3, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg4 R600_TReg32:%vreg3
96B%vreg5<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg5
112B%vreg6<def> = ADD 0, 0, 1, 0, 0, 0, %vreg4<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg6,%vreg4,%vreg5
128B%vreg7<def> = MOV 1, 0, 0, 0, %vreg2, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg7 R600_TReg32:%vreg2
144B%vreg8<def> = ADD 0, 0, 1, 0, 0, 0, %vreg7<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg8,%vreg7,%vreg5
160B%vreg10<def> = IMPLICIT_DEF; R600_Reg128:%vreg10
176B%vreg9<def,tied1> = INSERT_SUBREG %vreg10<tied0>, %vreg6<kill>, sel_x; R600_Reg128:%vreg9,%vreg10 R600_Reg32:%vreg6
192B%vreg11<def,tied1> = INSERT_SUBREG %vreg9<tied0>, %vreg8<kill>, sel_y; R600_Reg128:%vreg11,%vreg9 R600_Reg32:%vreg8
208B%vreg13<def> = IMPLICIT_DEF; R600_Reg32:%vreg13
224B%vreg12<def,tied1> = INSERT_SUBREG %vreg11<tied0>, %vreg13, sel_z; R600_Reg128:%vreg12,%vreg11 R600_Reg32:%vreg13
240B%vreg15<def> = IMPLICIT_DEF; R600_Reg32:%vreg15
256B%vreg14<def,tied1> = INSERT_SUBREG %vreg12<tied0>, %vreg15, sel_w; R600_Reg128:%vreg14,%vreg12 R600_Reg32:%vreg15
272B%T2_X<def> = COPY %vreg3; R600_TReg32:%vreg3
288B%vreg16<def> = COPY %vreg14:sel_x; R600_Reg32:%vreg16 R600_Reg128:%vreg14
304B%T2_Y<def> = COPY %vreg2; R600_TReg32:%vreg2
320B%vreg17<def> = COPY %vreg14:sel_y; R600_Reg32:%vreg17 R600_Reg128:%vreg14
336B%T2_Z<def> = COPY %vreg16; R600_Reg32:%vreg16
352B%T2_W<def> = COPY %vreg17; R600_Reg32:%vreg17
368B%T1_X<def> = COPY %vreg3; R600_TReg32:%vreg3
384B%T1_Y<def> = COPY %vreg2; R600_TReg32:%vreg2
400B%T1_Z<def> = COPY %vreg1; R600_TReg32:%vreg1
416B%T1_W<def> = COPY %vreg0; R600_TReg32:%vreg0
432BRETURN

# End machine code for function main.

# *** IR Dump Before Local Stack Slot Allocation ***:
# Machine code for function main: SSA
Function Live Ins: %T1_W in %vreg0, %T1_Z in %vreg1, %T1_Y in %vreg2, %T1_X in %vreg3
Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X

BB#0: derived from LLVM BB %0
    Live Ins: %T1_W %T1_Z %T1_Y %T1_X
%vreg3<def> = COPY %T1_X; R600_TReg32:%vreg3
%vreg2<def> = COPY %T1_Y; R600_TReg32:%vreg2
%vreg1<def> = COPY %T1_Z; R600_TReg32:%vreg1
%vreg0<def> = COPY %T1_W; R600_TReg32:%vreg0
%vreg4<def> = MOV 1, 0, 0, 0, %vreg3, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg4 R600_TReg32:%vreg3
%vreg5<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg5
%vreg6<def> = ADD 0, 0, 1, 0, 0, 0, %vreg4<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg6,%vreg4,%vreg5
%vreg7<def> = MOV 1, 0, 0, 0, %vreg2, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg7 R600_TReg32:%vreg2
%vreg8<def> = ADD 0, 0, 1, 0, 0, 0, %vreg7<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg8,%vreg7,%vreg5
%vreg10<def> = IMPLICIT_DEF; R600_Reg128:%vreg10
%vreg9<def,tied1> = INSERT_SUBREG %vreg10<tied0>, %vreg6<kill>, sel_x; R600_Reg128:%vreg9,%vreg10 R600_Reg32:%vreg6
%vreg11<def,tied1> = INSERT_SUBREG %vreg9<tied0>, %vreg8<kill>, sel_y; R600_Reg128:%vreg11,%vreg9 R600_Reg32:%vreg8
%vreg13<def> = IMPLICIT_DEF; R600_Reg32:%vreg13
%vreg12<def,tied1> = INSERT_SUBREG %vreg11<tied0>, %vreg13, sel_z; R600_Reg128:%vreg12,%vreg11 R600_Reg32:%vreg13
%vreg15<def> = IMPLICIT_DEF; R600_Reg32:%vreg15
%vreg14<def,tied1> = INSERT_SUBREG %vreg12<tied0>, %vreg15, sel_w; R600_Reg128:%vreg14,%vreg12 R600_Reg32:%vreg15
%T2_X<def> = COPY %vreg3; R600_TReg32:%vreg3
%vreg16<def> = COPY %vreg14:sel_x; R600_Reg32:%vreg16 R600_Reg128:%vreg14
%T2_Y<def> = COPY %vreg2; R600_TReg32:%vreg2
%vreg17<def> = COPY %vreg14:sel_y; R600_Reg32:%vreg17 R600_Reg128:%vreg14
%T2_Z<def> = COPY %vreg16; R600_Reg32:%vreg16
%T2_W<def> = COPY %vreg17; R600_Reg32:%vreg17
%T1_X<def> = COPY %vreg3; R600_TReg32:%vreg3
%T1_Y<def> = COPY %vreg2; R600_TReg32:%vreg2
%T1_Z<def> = COPY %vreg1; R600_TReg32:%vreg1
%T1_W<def> = COPY %vreg0; R600_TReg32:%vreg0
RETURN

# End machine code for function main.

# *** IR Dump Before Remove dead machine instructions ***:
# Machine code for function main: SSA
Function Live Ins: %T1_W in %vreg0, %T1_Z in %vreg1, %T1_Y in %vreg2, %T1_X in %vreg3
Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X

BB#0: derived from LLVM BB %0
    Live Ins: %T1_W %T1_Z %T1_Y %T1_X
%vreg3<def> = COPY %T1_X; R600_TReg32:%vreg3
%vreg2<def> = COPY %T1_Y; R600_TReg32:%vreg2
%vreg1<def> = COPY %T1_Z; R600_TReg32:%vreg1
%vreg0<def> = COPY %T1_W; R600_TReg32:%vreg0
%vreg4<def> = MOV 1, 0, 0, 0, %vreg3, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg4 R600_TReg32:%vreg3
%vreg5<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg5
%vreg6<def> = ADD 0, 0, 1, 0, 0, 0, %vreg4<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg6,%vreg4,%vreg5
%vreg7<def> = MOV 1, 0, 0, 0, %vreg2, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg7 R600_TReg32:%vreg2
%vreg8<def> = ADD 0, 0, 1, 0, 0, 0, %vreg7<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg8,%vreg7,%vreg5
%vreg10<def> = IMPLICIT_DEF; R600_Reg128:%vreg10
%vreg9<def,tied1> = INSERT_SUBREG %vreg10<tied0>, %vreg6<kill>, sel_x; R600_Reg128:%vreg9,%vreg10 R600_Reg32:%vreg6
%vreg11<def,tied1> = INSERT_SUBREG %vreg9<tied0>, %vreg8<kill>, sel_y; R600_Reg128:%vreg11,%vreg9 R600_Reg32:%vreg8
%vreg13<def> = IMPLICIT_DEF; R600_Reg32:%vreg13
%vreg12<def,tied1> = INSERT_SUBREG %vreg11<tied0>, %vreg13, sel_z; R600_Reg128:%vreg12,%vreg11 R600_Reg32:%vreg13
%vreg15<def> = IMPLICIT_DEF; R600_Reg32:%vreg15
%vreg14<def,tied1> = INSERT_SUBREG %vreg12<tied0>, %vreg15, sel_w; R600_Reg128:%vreg14,%vreg12 R600_Reg32:%vreg15
%T2_X<def> = COPY %vreg3; R600_TReg32:%vreg3
%vreg16<def> = COPY %vreg14:sel_x; R600_Reg32:%vreg16 R600_Reg128:%vreg14
%T2_Y<def> = COPY %vreg2; R600_TReg32:%vreg2
%vreg17<def> = COPY %vreg14:sel_y; R600_Reg32:%vreg17 R600_Reg128:%vreg14
%T2_Z<def> = COPY %vreg16; R600_Reg32:%vreg16
%T2_W<def> = COPY %vreg17; R600_Reg32:%vreg17
%T1_X<def> = COPY %vreg3; R600_TReg32:%vreg3
%T1_Y<def> = COPY %vreg2; R600_TReg32:%vreg2
%T1_Z<def> = COPY %vreg1; R600_TReg32:%vreg1
%T1_W<def> = COPY %vreg0; R600_TReg32:%vreg0
RETURN

# End machine code for function main.

# *** IR Dump Before Machine Loop Invariant Code Motion ***:
# Machine code for function main: SSA
Function Live Ins: %T1_W in %vreg0, %T1_Z in %vreg1, %T1_Y in %vreg2, %T1_X in %vreg3
Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X

BB#0: derived from LLVM BB %0
    Live Ins: %T1_W %T1_Z %T1_Y %T1_X
%vreg3<def> = COPY %T1_X; R600_TReg32:%vreg3
%vreg2<def> = COPY %T1_Y; R600_TReg32:%vreg2
%vreg1<def> = COPY %T1_Z; R600_TReg32:%vreg1
%vreg0<def> = COPY %T1_W; R600_TReg32:%vreg0
%vreg4<def> = MOV 1, 0, 0, 0, %vreg3, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg4 R600_TReg32:%vreg3
%vreg5<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg5
%vreg6<def> = ADD 0, 0, 1, 0, 0, 0, %vreg4<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg6,%vreg4,%vreg5
%vreg7<def> = MOV 1, 0, 0, 0, %vreg2, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg7 R600_TReg32:%vreg2
%vreg8<def> = ADD 0, 0, 1, 0, 0, 0, %vreg7<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg8,%vreg7,%vreg5
%vreg10<def> = IMPLICIT_DEF; R600_Reg128:%vreg10
%vreg9<def,tied1> = INSERT_SUBREG %vreg10<tied0>, %vreg6<kill>, sel_x; R600_Reg128:%vreg9,%vreg10 R600_Reg32:%vreg6
%vreg11<def,tied1> = INSERT_SUBREG %vreg9<tied0>, %vreg8<kill>, sel_y; R600_Reg128:%vreg11,%vreg9 R600_Reg32:%vreg8
%vreg13<def> = IMPLICIT_DEF; R600_Reg32:%vreg13
%vreg12<def,tied1> = INSERT_SUBREG %vreg11<tied0>, %vreg13, sel_z; R600_Reg128:%vreg12,%vreg11 R600_Reg32:%vreg13
%vreg15<def> = IMPLICIT_DEF; R600_Reg32:%vreg15
%vreg14<def,tied1> = INSERT_SUBREG %vreg12<tied0>, %vreg15, sel_w; R600_Reg128:%vreg14,%vreg12 R600_Reg32:%vreg15
%T2_X<def> = COPY %vreg3; R600_TReg32:%vreg3
%vreg16<def> = COPY %vreg14:sel_x; R600_Reg32:%vreg16 R600_Reg128:%vreg14
%T2_Y<def> = COPY %vreg2; R600_TReg32:%vreg2
%vreg17<def> = COPY %vreg14:sel_y; R600_Reg32:%vreg17 R600_Reg128:%vreg14
%T2_Z<def> = COPY %vreg16; R600_Reg32:%vreg16
%T2_W<def> = COPY %vreg17; R600_Reg32:%vreg17
%T1_X<def> = COPY %vreg3; R600_TReg32:%vreg3
%T1_Y<def> = COPY %vreg2; R600_TReg32:%vreg2
%T1_Z<def> = COPY %vreg1; R600_TReg32:%vreg1
%T1_W<def> = COPY %vreg0; R600_TReg32:%vreg0
RETURN

# End machine code for function main.

# *** IR Dump Before Machine Common Subexpression Elimination ***:
# Machine code for function main: SSA
Function Live Ins: %T1_W in %vreg0, %T1_Z in %vreg1, %T1_Y in %vreg2, %T1_X in %vreg3
Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X

BB#0: derived from LLVM BB %0
    Live Ins: %T1_W %T1_Z %T1_Y %T1_X
%vreg3<def> = COPY %T1_X; R600_TReg32:%vreg3
%vreg2<def> = COPY %T1_Y; R600_TReg32:%vreg2
%vreg1<def> = COPY %T1_Z; R600_TReg32:%vreg1
%vreg0<def> = COPY %T1_W; R600_TReg32:%vreg0
%vreg4<def> = MOV 1, 0, 0, 0, %vreg3, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg4 R600_TReg32:%vreg3
%vreg5<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg5
%vreg6<def> = ADD 0, 0, 1, 0, 0, 0, %vreg4<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg6,%vreg4,%vreg5
%vreg7<def> = MOV 1, 0, 0, 0, %vreg2, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg7 R600_TReg32:%vreg2
%vreg8<def> = ADD 0, 0, 1, 0, 0, 0, %vreg7<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg8,%vreg7,%vreg5
%vreg10<def> = IMPLICIT_DEF; R600_Reg128:%vreg10
%vreg9<def,tied1> = INSERT_SUBREG %vreg10<tied0>, %vreg6<kill>, sel_x; R600_Reg128:%vreg9,%vreg10 R600_Reg32:%vreg6
%vreg11<def,tied1> = INSERT_SUBREG %vreg9<tied0>, %vreg8<kill>, sel_y; R600_Reg128:%vreg11,%vreg9 R600_Reg32:%vreg8
%vreg13<def> = IMPLICIT_DEF; R600_Reg32:%vreg13
%vreg12<def,tied1> = INSERT_SUBREG %vreg11<tied0>, %vreg13, sel_z; R600_Reg128:%vreg12,%vreg11 R600_Reg32:%vreg13
%vreg15<def> = IMPLICIT_DEF; R600_Reg32:%vreg15
%vreg14<def,tied1> = INSERT_SUBREG %vreg12<tied0>, %vreg15, sel_w; R600_Reg128:%vreg14,%vreg12 R600_Reg32:%vreg15
%T2_X<def> = COPY %vreg3; R600_TReg32:%vreg3
%vreg16<def> = COPY %vreg14:sel_x; R600_Reg32:%vreg16 R600_Reg128:%vreg14
%T2_Y<def> = COPY %vreg2; R600_TReg32:%vreg2
%vreg17<def> = COPY %vreg14:sel_y; R600_Reg32:%vreg17 R600_Reg128:%vreg14
%T2_Z<def> = COPY %vreg16; R600_Reg32:%vreg16
%T2_W<def> = COPY %vreg17; R600_Reg32:%vreg17
%T1_X<def> = COPY %vreg3; R600_TReg32:%vreg3
%T1_Y<def> = COPY %vreg2; R600_TReg32:%vreg2
%T1_Z<def> = COPY %vreg1; R600_TReg32:%vreg1
%T1_W<def> = COPY %vreg0; R600_TReg32:%vreg0
RETURN

# End machine code for function main.

# *** IR Dump Before Machine code sinking ***:
# Machine code for function main: SSA
Function Live Ins: %T1_W in %vreg0, %T1_Z in %vreg1, %T1_Y in %vreg2, %T1_X in %vreg3
Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X

BB#0: derived from LLVM BB %0
    Live Ins: %T1_W %T1_Z %T1_Y %T1_X
%vreg3<def> = COPY %T1_X; R600_TReg32:%vreg3
%vreg2<def> = COPY %T1_Y; R600_TReg32:%vreg2
%vreg1<def> = COPY %T1_Z; R600_TReg32:%vreg1
%vreg0<def> = COPY %T1_W; R600_TReg32:%vreg0
%vreg4<def> = MOV 1, 0, 0, 0, %vreg3, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg4 R600_TReg32:%vreg3
%vreg5<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg5
%vreg6<def> = ADD 0, 0, 1, 0, 0, 0, %vreg4<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg6,%vreg4,%vreg5
%vreg7<def> = MOV 1, 0, 0, 0, %vreg2, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg7 R600_TReg32:%vreg2
%vreg8<def> = ADD 0, 0, 1, 0, 0, 0, %vreg7<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg8,%vreg7,%vreg5
%vreg10<def> = IMPLICIT_DEF; R600_Reg128:%vreg10
%vreg9<def,tied1> = INSERT_SUBREG %vreg10<tied0>, %vreg6<kill>, sel_x; R600_Reg128:%vreg9,%vreg10 R600_Reg32:%vreg6
%vreg11<def,tied1> = INSERT_SUBREG %vreg9<tied0>, %vreg8<kill>, sel_y; R600_Reg128:%vreg11,%vreg9 R600_Reg32:%vreg8
%vreg13<def> = IMPLICIT_DEF; R600_Reg32:%vreg13
%vreg12<def,tied1> = INSERT_SUBREG %vreg11<tied0>, %vreg13, sel_z; R600_Reg128:%vreg12,%vreg11 R600_Reg32:%vreg13
%vreg15<def> = IMPLICIT_DEF; R600_Reg32:%vreg15
%vreg14<def,tied1> = INSERT_SUBREG %vreg12<tied0>, %vreg15, sel_w; R600_Reg128:%vreg14,%vreg12 R600_Reg32:%vreg15
%T2_X<def> = COPY %vreg3; R600_TReg32:%vreg3
%vreg16<def> = COPY %vreg14:sel_x; R600_Reg32:%vreg16 R600_Reg128:%vreg14
%T2_Y<def> = COPY %vreg2; R600_TReg32:%vreg2
%vreg17<def> = COPY %vreg14:sel_y; R600_Reg32:%vreg17 R600_Reg128:%vreg14
%T2_Z<def> = COPY %vreg16; R600_Reg32:%vreg16
%T2_W<def> = COPY %vreg17; R600_Reg32:%vreg17
%T1_X<def> = COPY %vreg3; R600_TReg32:%vreg3
%T1_Y<def> = COPY %vreg2; R600_TReg32:%vreg2
%T1_Z<def> = COPY %vreg1; R600_TReg32:%vreg1
%T1_W<def> = COPY %vreg0; R600_TReg32:%vreg0
RETURN

# End machine code for function main.

# *** IR Dump Before Peephole Optimizations ***:
# Machine code for function main: SSA
Function Live Ins: %T1_W in %vreg0, %T1_Z in %vreg1, %T1_Y in %vreg2, %T1_X in %vreg3
Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X

BB#0: derived from LLVM BB %0
    Live Ins: %T1_W %T1_Z %T1_Y %T1_X
%vreg3<def> = COPY %T1_X; R600_TReg32:%vreg3
%vreg2<def> = COPY %T1_Y; R600_TReg32:%vreg2
%vreg1<def> = COPY %T1_Z; R600_TReg32:%vreg1
%vreg0<def> = COPY %T1_W; R600_TReg32:%vreg0
%vreg4<def> = MOV 1, 0, 0, 0, %vreg3, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg4 R600_TReg32:%vreg3
%vreg5<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg5
%vreg6<def> = ADD 0, 0, 1, 0, 0, 0, %vreg4<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg6,%vreg4,%vreg5
%vreg7<def> = MOV 1, 0, 0, 0, %vreg2, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg7 R600_TReg32:%vreg2
%vreg8<def> = ADD 0, 0, 1, 0, 0, 0, %vreg7<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg8,%vreg7,%vreg5
%vreg10<def> = IMPLICIT_DEF; R600_Reg128:%vreg10
%vreg9<def,tied1> = INSERT_SUBREG %vreg10<tied0>, %vreg6<kill>, sel_x; R600_Reg128:%vreg9,%vreg10 R600_Reg32:%vreg6
%vreg11<def,tied1> = INSERT_SUBREG %vreg9<tied0>, %vreg8<kill>, sel_y; R600_Reg128:%vreg11,%vreg9 R600_Reg32:%vreg8
%vreg13<def> = IMPLICIT_DEF; R600_Reg32:%vreg13
%vreg12<def,tied1> = INSERT_SUBREG %vreg11<tied0>, %vreg13, sel_z; R600_Reg128:%vreg12,%vreg11 R600_Reg32:%vreg13
%vreg15<def> = IMPLICIT_DEF; R600_Reg32:%vreg15
%vreg14<def,tied1> = INSERT_SUBREG %vreg12<tied0>, %vreg15, sel_w; R600_Reg128:%vreg14,%vreg12 R600_Reg32:%vreg15
%T2_X<def> = COPY %vreg3; R600_TReg32:%vreg3
%vreg16<def> = COPY %vreg14:sel_x; R600_Reg32:%vreg16 R600_Reg128:%vreg14
%T2_Y<def> = COPY %vreg2; R600_TReg32:%vreg2
%vreg17<def> = COPY %vreg14:sel_y; R600_Reg32:%vreg17 R600_Reg128:%vreg14
%T2_Z<def> = COPY %vreg16; R600_Reg32:%vreg16
%T2_W<def> = COPY %vreg17; R600_Reg32:%vreg17
%T1_X<def> = COPY %vreg3; R600_TReg32:%vreg3
%T1_Y<def> = COPY %vreg2; R600_TReg32:%vreg2
%T1_Z<def> = COPY %vreg1; R600_TReg32:%vreg1
%T1_W<def> = COPY %vreg0; R600_TReg32:%vreg0
RETURN

# End machine code for function main.

# *** IR Dump Before Process Implicit Definitions ***:
# Machine code for function main: SSA
Function Live Ins: %T1_W in %vreg0, %T1_Z in %vreg1, %T1_Y in %vreg2, %T1_X in %vreg3
Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X

BB#0: derived from LLVM BB %0
    Live Ins: %T1_W %T1_Z %T1_Y %T1_X
%vreg3<def> = COPY %T1_X; R600_TReg32:%vreg3
%vreg2<def> = COPY %T1_Y; R600_TReg32:%vreg2
%vreg1<def> = COPY %T1_Z; R600_TReg32:%vreg1
%vreg0<def> = COPY %T1_W; R600_TReg32:%vreg0
%vreg4<def> = MOV 1, 0, 0, 0, %vreg3, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg4 R600_TReg32:%vreg3
%vreg5<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg5
%vreg6<def> = ADD 0, 0, 1, 0, 0, 0, %vreg4<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg6,%vreg4,%vreg5
%vreg7<def> = MOV 1, 0, 0, 0, %vreg2, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg7 R600_TReg32:%vreg2
%vreg8<def> = ADD 0, 0, 1, 0, 0, 0, %vreg7<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg8,%vreg7,%vreg5
%vreg10<def> = IMPLICIT_DEF; R600_Reg128:%vreg10
%vreg9<def,tied1> = INSERT_SUBREG %vreg10<tied0>, %vreg6<kill>, sel_x; R600_Reg128:%vreg9,%vreg10 R600_Reg32:%vreg6
%vreg11<def,tied1> = INSERT_SUBREG %vreg9<tied0>, %vreg8<kill>, sel_y; R600_Reg128:%vreg11,%vreg9 R600_Reg32:%vreg8
%vreg13<def> = IMPLICIT_DEF; R600_Reg32:%vreg13
%vreg12<def,tied1> = INSERT_SUBREG %vreg11<tied0>, %vreg13, sel_z; R600_Reg128:%vreg12,%vreg11 R600_Reg32:%vreg13
%vreg15<def> = IMPLICIT_DEF; R600_Reg32:%vreg15
%vreg14<def,tied1> = INSERT_SUBREG %vreg12<tied0>, %vreg15, sel_w; R600_Reg128:%vreg14,%vreg12 R600_Reg32:%vreg15
%T2_X<def> = COPY %vreg3; R600_TReg32:%vreg3
%vreg16<def> = COPY %vreg14:sel_x; R600_Reg32:%vreg16 R600_Reg128:%vreg14
%T2_Y<def> = COPY %vreg2; R600_TReg32:%vreg2
%vreg17<def> = COPY %vreg14:sel_y; R600_Reg32:%vreg17 R600_Reg128:%vreg14
%T2_Z<def> = COPY %vreg16; R600_Reg32:%vreg16
%T2_W<def> = COPY %vreg17; R600_Reg32:%vreg17
%T1_X<def> = COPY %vreg3; R600_TReg32:%vreg3
%T1_Y<def> = COPY %vreg2; R600_TReg32:%vreg2
%T1_Z<def> = COPY %vreg1; R600_TReg32:%vreg1
%T1_W<def> = COPY %vreg0; R600_TReg32:%vreg0
RETURN

# End machine code for function main.

# *** IR Dump Before Remove unreachable machine basic blocks ***:
# Machine code for function main: SSA
Function Live Ins: %T1_W in %vreg0, %T1_Z in %vreg1, %T1_Y in %vreg2, %T1_X in %vreg3
Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X

BB#0: derived from LLVM BB %0
    Live Ins: %T1_W %T1_Z %T1_Y %T1_X
%vreg3<def> = COPY %T1_X; R600_TReg32:%vreg3
%vreg2<def> = COPY %T1_Y; R600_TReg32:%vreg2
%vreg1<def> = COPY %T1_Z; R600_TReg32:%vreg1
%vreg0<def> = COPY %T1_W; R600_TReg32:%vreg0
%vreg4<def> = MOV 1, 0, 0, 0, %vreg3, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg4 R600_TReg32:%vreg3
%vreg5<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg5
%vreg6<def> = ADD 0, 0, 1, 0, 0, 0, %vreg4<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg6,%vreg4,%vreg5
%vreg7<def> = MOV 1, 0, 0, 0, %vreg2, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg7 R600_TReg32:%vreg2
%vreg8<def> = ADD 0, 0, 1, 0, 0, 0, %vreg7<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg8,%vreg7,%vreg5
%vreg9<def,tied1> = INSERT_SUBREG %vreg10<undef,tied0>, %vreg6<kill>, sel_x; R600_Reg128:%vreg9,%vreg10 R600_Reg32:%vreg6
%vreg11<def,tied1> = INSERT_SUBREG %vreg9<tied0>, %vreg8<kill>, sel_y; R600_Reg128:%vreg11,%vreg9 R600_Reg32:%vreg8
%vreg12<def,tied1> = INSERT_SUBREG %vreg11<tied0>, %vreg13<undef>, sel_z; R600_Reg128:%vreg12,%vreg11 R600_Reg32:%vreg13
%vreg14<def,tied1> = INSERT_SUBREG %vreg12<tied0>, %vreg15<undef>, sel_w; R600_Reg128:%vreg14,%vreg12 R600_Reg32:%vreg15
%T2_X<def> = COPY %vreg3; R600_TReg32:%vreg3
%vreg16<def> = COPY %vreg14:sel_x; R600_Reg32:%vreg16 R600_Reg128:%vreg14
%T2_Y<def> = COPY %vreg2; R600_TReg32:%vreg2
%vreg17<def> = COPY %vreg14:sel_y; R600_Reg32:%vreg17 R600_Reg128:%vreg14
%T2_Z<def> = COPY %vreg16; R600_Reg32:%vreg16
%T2_W<def> = COPY %vreg17; R600_Reg32:%vreg17
%T1_X<def> = COPY %vreg3; R600_TReg32:%vreg3
%T1_Y<def> = COPY %vreg2; R600_TReg32:%vreg2
%T1_Z<def> = COPY %vreg1; R600_TReg32:%vreg1
%T1_W<def> = COPY %vreg0; R600_TReg32:%vreg0
RETURN

# End machine code for function main.

# *** IR Dump Before Live Variable Analysis ***:
# Machine code for function main: SSA
Function Live Ins: %T1_W in %vreg0, %T1_Z in %vreg1, %T1_Y in %vreg2, %T1_X in %vreg3
Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X

BB#0: derived from LLVM BB %0
    Live Ins: %T1_W %T1_Z %T1_Y %T1_X
%vreg3<def> = COPY %T1_X; R600_TReg32:%vreg3
%vreg2<def> = COPY %T1_Y; R600_TReg32:%vreg2
%vreg1<def> = COPY %T1_Z; R600_TReg32:%vreg1
%vreg0<def> = COPY %T1_W; R600_TReg32:%vreg0
%vreg4<def> = MOV 1, 0, 0, 0, %vreg3, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg4 R600_TReg32:%vreg3
%vreg5<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg5
%vreg6<def> = ADD 0, 0, 1, 0, 0, 0, %vreg4<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg6,%vreg4,%vreg5
%vreg7<def> = MOV 1, 0, 0, 0, %vreg2, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg7 R600_TReg32:%vreg2
%vreg8<def> = ADD 0, 0, 1, 0, 0, 0, %vreg7<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg8,%vreg7,%vreg5
%vreg9<def,tied1> = INSERT_SUBREG %vreg10<undef,tied0>, %vreg6<kill>, sel_x; R600_Reg128:%vreg9,%vreg10 R600_Reg32:%vreg6
%vreg11<def,tied1> = INSERT_SUBREG %vreg9<tied0>, %vreg8<kill>, sel_y; R600_Reg128:%vreg11,%vreg9 R600_Reg32:%vreg8
%vreg12<def,tied1> = INSERT_SUBREG %vreg11<tied0>, %vreg13<undef>, sel_z; R600_Reg128:%vreg12,%vreg11 R600_Reg32:%vreg13
%vreg14<def,tied1> = INSERT_SUBREG %vreg12<tied0>, %vreg15<undef>, sel_w; R600_Reg128:%vreg14,%vreg12 R600_Reg32:%vreg15
%T2_X<def> = COPY %vreg3; R600_TReg32:%vreg3
%vreg16<def> = COPY %vreg14:sel_x; R600_Reg32:%vreg16 R600_Reg128:%vreg14
%T2_Y<def> = COPY %vreg2; R600_TReg32:%vreg2
%vreg17<def> = COPY %vreg14:sel_y; R600_Reg32:%vreg17 R600_Reg128:%vreg14
%T2_Z<def> = COPY %vreg16; R600_Reg32:%vreg16
%T2_W<def> = COPY %vreg17; R600_Reg32:%vreg17
%T1_X<def> = COPY %vreg3; R600_TReg32:%vreg3
%T1_Y<def> = COPY %vreg2; R600_TReg32:%vreg2
%T1_Z<def> = COPY %vreg1; R600_TReg32:%vreg1
%T1_W<def> = COPY %vreg0; R600_TReg32:%vreg0
RETURN

# End machine code for function main.

# *** IR Dump Before Eliminate PHI nodes for register allocation ***:
# Machine code for function main: SSA
Function Live Ins: %T1_W in %vreg0, %T1_Z in %vreg1, %T1_Y in %vreg2, %T1_X in %vreg3
Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X

BB#0: derived from LLVM BB %0
    Live Ins: %T1_W %T1_Z %T1_Y %T1_X
%vreg3<def> = COPY %T1_X<kill>; R600_TReg32:%vreg3
%vreg2<def> = COPY %T1_Y<kill>; R600_TReg32:%vreg2
%vreg1<def> = COPY %T1_Z<kill>; R600_TReg32:%vreg1
%vreg0<def> = COPY %T1_W<kill>; R600_TReg32:%vreg0
%vreg4<def> = MOV 1, 0, 0, 0, %vreg3, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg4 R600_TReg32:%vreg3
%vreg5<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg5
%vreg6<def> = ADD 0, 0, 1, 0, 0, 0, %vreg4<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg6,%vreg4,%vreg5
%vreg7<def> = MOV 1, 0, 0, 0, %vreg2, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg7 R600_TReg32:%vreg2
%vreg8<def> = ADD 0, 0, 1, 0, 0, 0, %vreg7<kill>, 0, 0, 0, %vreg5<kill>, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg8,%vreg7,%vreg5
%vreg9<def,tied1> = INSERT_SUBREG %vreg10<undef,tied0>, %vreg6<kill>, sel_x; R600_Reg128:%vreg9,%vreg10 R600_Reg32:%vreg6
%vreg11<def,tied1> = INSERT_SUBREG %vreg9<kill,tied0>, %vreg8<kill>, sel_y; R600_Reg128:%vreg11,%vreg9 R600_Reg32:%vreg8
%vreg12<def,tied1> = INSERT_SUBREG %vreg11<kill,tied0>, %vreg13<undef>, sel_z; R600_Reg128:%vreg12,%vreg11 R600_Reg32:%vreg13
%vreg14<def,tied1> = INSERT_SUBREG %vreg12<kill,tied0>, %vreg15<undef>, sel_w; R600_Reg128:%vreg14,%vreg12 R600_Reg32:%vreg15
%T2_X<def> = COPY %vreg3; R600_TReg32:%vreg3
%vreg16<def> = COPY %vreg14:sel_x; R600_Reg32:%vreg16 R600_Reg128:%vreg14
%T2_Y<def> = COPY %vreg2; R600_TReg32:%vreg2
%vreg17<def> = COPY %vreg14:sel_y<kill>; R600_Reg32:%vreg17 R600_Reg128:%vreg14
%T2_Z<def> = COPY %vreg16<kill>; R600_Reg32:%vreg16
%T2_W<def> = COPY %vreg17<kill>; R600_Reg32:%vreg17
%T1_X<def> = COPY %vreg3<kill>; R600_TReg32:%vreg3
%T1_Y<def> = COPY %vreg2<kill>; R600_TReg32:%vreg2
%T1_Z<def> = COPY %vreg1<kill>; R600_TReg32:%vreg1
%T1_W<def> = COPY %vreg0<kill>; R600_TReg32:%vreg0
RETURN %T1_W<imp-use,kill>, %T1_Z<imp-use,kill>, %T1_Y<imp-use,kill>, %T1_X<imp-use,kill>, %T2_W<imp-use,kill>, %T2_Z<imp-use,kill>, %T2_Y<imp-use,kill>, %T2_X<imp-use,kill>

# End machine code for function main.

# *** IR Dump Before Two-Address instruction pass ***:
# Machine code for function main: Post SSA
Function Live Ins: %T1_W in %vreg0, %T1_Z in %vreg1, %T1_Y in %vreg2, %T1_X in %vreg3
Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X

BB#0: derived from LLVM BB %0
    Live Ins: %T1_W %T1_Z %T1_Y %T1_X
%vreg3<def> = COPY %T1_X<kill>; R600_TReg32:%vreg3
%vreg2<def> = COPY %T1_Y<kill>; R600_TReg32:%vreg2
%vreg1<def> = COPY %T1_Z<kill>; R600_TReg32:%vreg1
%vreg0<def> = COPY %T1_W<kill>; R600_TReg32:%vreg0
%vreg4<def> = MOV 1, 0, 0, 0, %vreg3, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg4 R600_TReg32:%vreg3
%vreg5<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg5
%vreg6<def> = ADD 0, 0, 1, 0, 0, 0, %vreg4<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg6,%vreg4,%vreg5
%vreg7<def> = MOV 1, 0, 0, 0, %vreg2, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg7 R600_TReg32:%vreg2
%vreg8<def> = ADD 0, 0, 1, 0, 0, 0, %vreg7<kill>, 0, 0, 0, %vreg5<kill>, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg8,%vreg7,%vreg5
%vreg9<def,tied1> = INSERT_SUBREG %vreg10<undef,tied0>, %vreg6<kill>, sel_x; R600_Reg128:%vreg9,%vreg10 R600_Reg32:%vreg6
%vreg11<def,tied1> = INSERT_SUBREG %vreg9<kill,tied0>, %vreg8<kill>, sel_y; R600_Reg128:%vreg11,%vreg9 R600_Reg32:%vreg8
%vreg12<def,tied1> = INSERT_SUBREG %vreg11<kill,tied0>, %vreg13<undef>, sel_z; R600_Reg128:%vreg12,%vreg11 R600_Reg32:%vreg13
%vreg14<def,tied1> = INSERT_SUBREG %vreg12<kill,tied0>, %vreg15<undef>, sel_w; R600_Reg128:%vreg14,%vreg12 R600_Reg32:%vreg15
%T2_X<def> = COPY %vreg3; R600_TReg32:%vreg3
%vreg16<def> = COPY %vreg14:sel_x; R600_Reg32:%vreg16 R600_Reg128:%vreg14
%T2_Y<def> = COPY %vreg2; R600_TReg32:%vreg2
%vreg17<def> = COPY %vreg14:sel_y<kill>; R600_Reg32:%vreg17 R600_Reg128:%vreg14
%T2_Z<def> = COPY %vreg16<kill>; R600_Reg32:%vreg16
%T2_W<def> = COPY %vreg17<kill>; R600_Reg32:%vreg17
%T1_X<def> = COPY %vreg3<kill>; R600_TReg32:%vreg3
%T1_Y<def> = COPY %vreg2<kill>; R600_TReg32:%vreg2
%T1_Z<def> = COPY %vreg1<kill>; R600_TReg32:%vreg1
%T1_W<def> = COPY %vreg0<kill>; R600_TReg32:%vreg0
RETURN %T1_W<imp-use,kill>, %T1_Z<imp-use,kill>, %T1_Y<imp-use,kill>, %T1_X<imp-use,kill>, %T2_W<imp-use,kill>, %T2_Z<imp-use,kill>, %T2_Y<imp-use,kill>, %T2_X<imp-use,kill>

# End machine code for function main.

# *** IR Dump Before Slot index numbering ***:
# Machine code for function main: Post SSA
Function Live Ins: %T1_W in %vreg0, %T1_Z in %vreg1, %T1_Y in %vreg2, %T1_X in %vreg3
Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X

BB#0: derived from LLVM BB %0
    Live Ins: %T1_W %T1_Z %T1_Y %T1_X
%vreg3<def> = COPY %T1_X<kill>; R600_TReg32:%vreg3
%vreg2<def> = COPY %T1_Y<kill>; R600_TReg32:%vreg2
%vreg1<def> = COPY %T1_Z<kill>; R600_TReg32:%vreg1
%vreg0<def> = COPY %T1_W<kill>; R600_TReg32:%vreg0
%vreg4<def> = MOV 1, 0, 0, 0, %vreg3, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg4 R600_TReg32:%vreg3
%vreg5<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg5
%vreg6<def> = ADD 0, 0, 1, 0, 0, 0, %vreg4<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg6,%vreg4,%vreg5
%vreg7<def> = MOV 1, 0, 0, 0, %vreg2, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg7 R600_TReg32:%vreg2
%vreg8<def> = ADD 0, 0, 1, 0, 0, 0, %vreg7<kill>, 0, 0, 0, %vreg5<kill>, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg8,%vreg7,%vreg5
%vreg9:sel_x<def,read-undef> = COPY %vreg6<kill>; R600_Reg128:%vreg9 R600_Reg32:%vreg6
%vreg11<def> = COPY %vreg9<kill>; R600_Reg128:%vreg11,%vreg9
%vreg11:sel_y<def> = COPY %vreg8<kill>; R600_Reg128:%vreg11 R600_Reg32:%vreg8
%vreg12<def> = COPY %vreg11<kill>; R600_Reg128:%vreg12,%vreg11
%vreg12:sel_z<def> = COPY %vreg13<undef>; R600_Reg128:%vreg12 R600_Reg32:%vreg13
%vreg14<def> = COPY %vreg12<kill>; R600_Reg128:%vreg14,%vreg12
%vreg14:sel_w<def> = COPY %vreg15<undef>; R600_Reg128:%vreg14 R600_Reg32:%vreg15
%T2_X<def> = COPY %vreg3; R600_TReg32:%vreg3
%vreg16<def> = COPY %vreg14:sel_x; R600_Reg32:%vreg16 R600_Reg128:%vreg14
%T2_Y<def> = COPY %vreg2; R600_TReg32:%vreg2
%vreg17<def> = COPY %vreg14:sel_y<kill>; R600_Reg32:%vreg17 R600_Reg128:%vreg14
%T2_Z<def> = COPY %vreg16<kill>; R600_Reg32:%vreg16
%T2_W<def> = COPY %vreg17<kill>; R600_Reg32:%vreg17
%T1_X<def> = COPY %vreg3<kill>; R600_TReg32:%vreg3
%T1_Y<def> = COPY %vreg2<kill>; R600_TReg32:%vreg2
%T1_Z<def> = COPY %vreg1<kill>; R600_TReg32:%vreg1
%T1_W<def> = COPY %vreg0<kill>; R600_TReg32:%vreg0
RETURN %T1_W<imp-use,kill>, %T1_Z<imp-use,kill>, %T1_Y<imp-use,kill>, %T1_X<imp-use,kill>, %T2_W<imp-use,kill>, %T2_Z<imp-use,kill>, %T2_Y<imp-use,kill>, %T2_X<imp-use,kill>

# End machine code for function main.

# *** IR Dump Before Live Interval Analysis ***:
# Machine code for function main: Post SSA
Function Live Ins: %T1_W in %vreg0, %T1_Z in %vreg1, %T1_Y in %vreg2, %T1_X in %vreg3
Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X

0BBB#0: derived from LLVM BB %0
    Live Ins: %T1_W %T1_Z %T1_Y %T1_X
16B%vreg3<def> = COPY %T1_X<kill>; R600_TReg32:%vreg3
32B%vreg2<def> = COPY %T1_Y<kill>; R600_TReg32:%vreg2
48B%vreg1<def> = COPY %T1_Z<kill>; R600_TReg32:%vreg1
64B%vreg0<def> = COPY %T1_W<kill>; R600_TReg32:%vreg0
80B%vreg4<def> = MOV 1, 0, 0, 0, %vreg3, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg4 R600_TReg32:%vreg3
96B%vreg5<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg5
112B%vreg6<def> = ADD 0, 0, 1, 0, 0, 0, %vreg4<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg6,%vreg4,%vreg5
128B%vreg7<def> = MOV 1, 0, 0, 0, %vreg2, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg7 R600_TReg32:%vreg2
144B%vreg8<def> = ADD 0, 0, 1, 0, 0, 0, %vreg7<kill>, 0, 0, 0, %vreg5<kill>, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg8,%vreg7,%vreg5
160B%vreg9:sel_x<def,read-undef> = COPY %vreg6<kill>; R600_Reg128:%vreg9 R600_Reg32:%vreg6
176B%vreg11<def> = COPY %vreg9<kill>; R600_Reg128:%vreg11,%vreg9
192B%vreg11:sel_y<def> = COPY %vreg8<kill>; R600_Reg128:%vreg11 R600_Reg32:%vreg8
208B%vreg12<def> = COPY %vreg11<kill>; R600_Reg128:%vreg12,%vreg11
224B%vreg12:sel_z<def> = COPY %vreg13<undef>; R600_Reg128:%vreg12 R600_Reg32:%vreg13
240B%vreg14<def> = COPY %vreg12<kill>; R600_Reg128:%vreg14,%vreg12
256B%vreg14:sel_w<def> = COPY %vreg15<undef>; R600_Reg128:%vreg14 R600_Reg32:%vreg15
272B%T2_X<def> = COPY %vreg3; R600_TReg32:%vreg3
288B%vreg16<def> = COPY %vreg14:sel_x; R600_Reg32:%vreg16 R600_Reg128:%vreg14
304B%T2_Y<def> = COPY %vreg2; R600_TReg32:%vreg2
320B%vreg17<def> = COPY %vreg14:sel_y<kill>; R600_Reg32:%vreg17 R600_Reg128:%vreg14
336B%T2_Z<def> = COPY %vreg16<kill>; R600_Reg32:%vreg16
352B%T2_W<def> = COPY %vreg17<kill>; R600_Reg32:%vreg17
368B%T1_X<def> = COPY %vreg3<kill>; R600_TReg32:%vreg3
384B%T1_Y<def> = COPY %vreg2<kill>; R600_TReg32:%vreg2
400B%T1_Z<def> = COPY %vreg1<kill>; R600_TReg32:%vreg1
416B%T1_W<def> = COPY %vreg0<kill>; R600_TReg32:%vreg0
432BRETURN %T1_W<imp-use,kill>, %T1_Z<imp-use,kill>, %T1_Y<imp-use,kill>, %T1_X<imp-use,kill>, %T2_W<imp-use,kill>, %T2_Z<imp-use,kill>, %T2_Y<imp-use,kill>, %T2_X<imp-use,kill>

# End machine code for function main.

********** COMPUTING LIVE INTERVALS **********
********** Function: main
BB#0:# derived from 
16B%vreg3<def> = COPY %T1_X<kill>; R600_TReg32:%vreg3
register: %vreg3 +[16r,368r:0)
32B%vreg2<def> = COPY %T1_Y<kill>; R600_TReg32:%vreg2
register: %vreg2 +[32r,384r:0)
48B%vreg1<def> = COPY %T1_Z<kill>; R600_TReg32:%vreg1
register: %vreg1 +[48r,400r:0)
64B%vreg0<def> = COPY %T1_W<kill>; R600_TReg32:%vreg0
register: %vreg0 +[64r,416r:0)
80B%vreg4<def> = MOV 1, 0, 0, 0, %vreg3, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg4 R600_TReg32:%vreg3
register: %vreg4 +[80r,112r:0)
96B%vreg5<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg5
register: %vreg5 +[96r,144r:0)
112B%vreg6<def> = ADD 0, 0, 1, 0, 0, 0, %vreg4<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg6,%vreg4,%vreg5
register: %vreg6 +[112r,160r:0)
128B%vreg7<def> = MOV 1, 0, 0, 0, %vreg2, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg7 R600_TReg32:%vreg2
register: %vreg7 +[128r,144r:0)
144B%vreg8<def> = ADD 0, 0, 1, 0, 0, 0, %vreg7<kill>, 0, 0, 0, %vreg5<kill>, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg8,%vreg7,%vreg5
register: %vreg8 +[144r,192r:0)
160B%vreg9:sel_x<def,read-undef> = COPY %vreg6<kill>; R600_Reg128:%vreg9 R600_Reg32:%vreg6
register: %vreg9 +[160r,176r:0)
176B%vreg11<def> = COPY %vreg9<kill>; R600_Reg128:%vreg11,%vreg9
register: %vreg11 +[176r,208r:0)
192B%vreg11:sel_y<def> = COPY %vreg8<kill>; R600_Reg128:%vreg11 R600_Reg32:%vreg8
register: %vreg11 replace range with [176r,192r:1) RESULT: [176r,192r:1)[192r,208r:0)  0 at 192r 1 at 176r
208B%vreg12<def> = COPY %vreg11<kill>; R600_Reg128:%vreg12,%vreg11
register: %vreg12 +[208r,240r:0)
224B%vreg12:sel_z<def> = COPY %vreg13<undef>; R600_Reg128:%vreg12 R600_Reg32:%vreg13
register: %vreg12 replace range with [208r,224r:1) RESULT: [208r,224r:1)[224r,240r:0)  0 at 224r 1 at 208r
240B%vreg14<def> = COPY %vreg12<kill>; R600_Reg128:%vreg14,%vreg12
register: %vreg14 +[240r,320r:0)
256B%vreg14:sel_w<def> = COPY %vreg15<undef>; R600_Reg128:%vreg14 R600_Reg32:%vreg15
register: %vreg14 replace range with [240r,256r:1) RESULT: [240r,256r:1)[256r,320r:0)  0 at 256r 1 at 240r
272B%T2_X<def> = COPY %vreg3; R600_TReg32:%vreg3
288B%vreg16<def> = COPY %vreg14:sel_x; R600_Reg32:%vreg16 R600_Reg128:%vreg14
register: %vreg16 +[288r,336r:0)
304B%T2_Y<def> = COPY %vreg2; R600_TReg32:%vreg2
320B%vreg17<def> = COPY %vreg14:sel_y<kill>; R600_Reg32:%vreg17 R600_Reg128:%vreg14
register: %vreg17 +[320r,352r:0)
336B%T2_Z<def> = COPY %vreg16<kill>; R600_Reg32:%vreg16
352B%T2_W<def> = COPY %vreg17<kill>; R600_Reg32:%vreg17
368B%T1_X<def> = COPY %vreg3<kill>; R600_TReg32:%vreg3
384B%T1_Y<def> = COPY %vreg2<kill>; R600_TReg32:%vreg2
400B%T1_Z<def> = COPY %vreg1<kill>; R600_TReg32:%vreg1
416B%T1_W<def> = COPY %vreg0<kill>; R600_TReg32:%vreg0
432BRETURN %T1_W<imp-use,kill>, %T1_Z<imp-use,kill>, %T1_Y<imp-use,kill>, %T1_X<imp-use,kill>, %T2_W<imp-use,kill>, %T2_Z<imp-use,kill>, %T2_Y<imp-use,kill>, %T2_X<imp-use,kill>
Computing live-in reg-units in ABI blocks.
0BBB#0 T1_W#0 T1_Z#0 T1_Y#0 T1_X#0
Created 4 new intervals.
********** INTERVALS **********
T1_W = [0B,64r:0)[416r,432r:1)  0 at 0B-phi 1 at 416r
T1_X = [0B,16r:0)[368r,432r:1)  0 at 0B-phi 1 at 368r
T1_Y = [0B,32r:0)[384r,432r:1)  0 at 0B-phi 1 at 384r
T1_Z = [0B,48r:0)[400r,432r:1)  0 at 0B-phi 1 at 400r
%vreg0 = [64r,416r:0)  0 at 64r
%vreg1 = [48r,400r:0)  0 at 48r
%vreg2 = [32r,384r:0)  0 at 32r
%vreg3 = [16r,368r:0)  0 at 16r
%vreg4 = [80r,112r:0)  0 at 80r
%vreg5 = [96r,144r:0)  0 at 96r
%vreg6 = [112r,160r:0)  0 at 112r
%vreg7 = [128r,144r:0)  0 at 128r
%vreg8 = [144r,192r:0)  0 at 144r
%vreg9 = [160r,176r:0)  0 at 160r
%vreg11 = [176r,192r:1)[192r,208r:0)  0 at 192r 1 at 176r
%vreg12 = [208r,224r:1)[224r,240r:0)  0 at 224r 1 at 208r
%vreg13 = EMPTY
%vreg14 = [240r,256r:1)[256r,320r:0)  0 at 256r 1 at 240r
%vreg15 = EMPTY
%vreg16 = [288r,336r:0)  0 at 288r
%vreg17 = [320r,352r:0)  0 at 320r
********** MACHINEINSTRS **********
# Machine code for function main: Post SSA
Function Live Ins: %T1_W in %vreg0, %T1_Z in %vreg1, %T1_Y in %vreg2, %T1_X in %vreg3
Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X

0BBB#0: derived from LLVM BB %0
    Live Ins: %T1_W %T1_Z %T1_Y %T1_X
16B%vreg3<def> = COPY %T1_X; R600_TReg32:%vreg3
32B%vreg2<def> = COPY %T1_Y; R600_TReg32:%vreg2
48B%vreg1<def> = COPY %T1_Z; R600_TReg32:%vreg1
64B%vreg0<def> = COPY %T1_W; R600_TReg32:%vreg0
80B%vreg4<def> = MOV 1, 0, 0, 0, %vreg3, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg4 R600_TReg32:%vreg3
96B%vreg5<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg5
112B%vreg6<def> = ADD 0, 0, 1, 0, 0, 0, %vreg4<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg6,%vreg4,%vreg5
128B%vreg7<def> = MOV 1, 0, 0, 0, %vreg2, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg7 R600_TReg32:%vreg2
144B%vreg8<def> = ADD 0, 0, 1, 0, 0, 0, %vreg7<kill>, 0, 0, 0, %vreg5<kill>, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg8,%vreg7,%vreg5
160B%vreg9:sel_x<def,read-undef> = COPY %vreg6<kill>; R600_Reg128:%vreg9 R600_Reg32:%vreg6
176B%vreg11<def> = COPY %vreg9<kill>; R600_Reg128:%vreg11,%vreg9
192B%vreg11:sel_y<def> = COPY %vreg8<kill>; R600_Reg128:%vreg11 R600_Reg32:%vreg8
208B%vreg12<def> = COPY %vreg11<kill>; R600_Reg128:%vreg12,%vreg11
224B%vreg12:sel_z<def> = COPY %vreg13<undef>; R600_Reg128:%vreg12 R600_Reg32:%vreg13
240B%vreg14<def> = COPY %vreg12<kill>; R600_Reg128:%vreg14,%vreg12
256B%vreg14:sel_w<def> = COPY %vreg15<undef>; R600_Reg128:%vreg14 R600_Reg32:%vreg15
272B%T2_X<def> = COPY %vreg3; R600_TReg32:%vreg3
288B%vreg16<def> = COPY %vreg14:sel_x; R600_Reg32:%vreg16 R600_Reg128:%vreg14
304B%T2_Y<def> = COPY %vreg2; R600_TReg32:%vreg2
320B%vreg17<def> = COPY %vreg14:sel_y<kill>; R600_Reg32:%vreg17 R600_Reg128:%vreg14
336B%T2_Z<def> = COPY %vreg16<kill>; R600_Reg32:%vreg16
352B%T2_W<def> = COPY %vreg17<kill>; R600_Reg32:%vreg17
368B%T1_X<def> = COPY %vreg3<kill>; R600_TReg32:%vreg3
384B%T1_Y<def> = COPY %vreg2<kill>; R600_TReg32:%vreg2
400B%T1_Z<def> = COPY %vreg1<kill>; R600_TReg32:%vreg1
416B%T1_W<def> = COPY %vreg0<kill>; R600_TReg32:%vreg0
432BRETURN %T1_W<imp-use>, %T1_Z<imp-use>, %T1_Y<imp-use>, %T1_X<imp-use>, %T2_W<imp-use,kill>, %T2_Z<imp-use,kill>, %T2_Y<imp-use,kill>, %T2_X<imp-use,kill>

# End machine code for function main.

# *** IR Dump Before Debug Variable Analysis ***:
# Machine code for function main: Post SSA
Function Live Ins: %T1_W in %vreg0, %T1_Z in %vreg1, %T1_Y in %vreg2, %T1_X in %vreg3
Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X

0BBB#0: derived from LLVM BB %0
    Live Ins: %T1_W %T1_Z %T1_Y %T1_X
16B%vreg3<def> = COPY %T1_X; R600_TReg32:%vreg3
32B%vreg2<def> = COPY %T1_Y; R600_TReg32:%vreg2
48B%vreg1<def> = COPY %T1_Z; R600_TReg32:%vreg1
64B%vreg0<def> = COPY %T1_W; R600_TReg32:%vreg0
80B%vreg4<def> = MOV 1, 0, 0, 0, %vreg3, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg4 R600_TReg32:%vreg3
96B%vreg5<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg5
112B%vreg6<def> = ADD 0, 0, 1, 0, 0, 0, %vreg4<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg6,%vreg4,%vreg5
128B%vreg7<def> = MOV 1, 0, 0, 0, %vreg2, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg7 R600_TReg32:%vreg2
144B%vreg8<def> = ADD 0, 0, 1, 0, 0, 0, %vreg7<kill>, 0, 0, 0, %vreg5<kill>, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg8,%vreg7,%vreg5
160B%vreg9:sel_x<def,read-undef> = COPY %vreg6<kill>; R600_Reg128:%vreg9 R600_Reg32:%vreg6
176B%vreg11<def> = COPY %vreg9<kill>; R600_Reg128:%vreg11,%vreg9
192B%vreg11:sel_y<def> = COPY %vreg8<kill>; R600_Reg128:%vreg11 R600_Reg32:%vreg8
208B%vreg12<def> = COPY %vreg11<kill>; R600_Reg128:%vreg12,%vreg11
224B%vreg12:sel_z<def> = COPY %vreg13<undef>; R600_Reg128:%vreg12 R600_Reg32:%vreg13
240B%vreg14<def> = COPY %vreg12<kill>; R600_Reg128:%vreg14,%vreg12
256B%vreg14:sel_w<def> = COPY %vreg15<undef>; R600_Reg128:%vreg14 R600_Reg32:%vreg15
272B%T2_X<def> = COPY %vreg3; R600_TReg32:%vreg3
288B%vreg16<def> = COPY %vreg14:sel_x; R600_Reg32:%vreg16 R600_Reg128:%vreg14
304B%T2_Y<def> = COPY %vreg2; R600_TReg32:%vreg2
320B%vreg17<def> = COPY %vreg14:sel_y<kill>; R600_Reg32:%vreg17 R600_Reg128:%vreg14
336B%T2_Z<def> = COPY %vreg16<kill>; R600_Reg32:%vreg16
352B%T2_W<def> = COPY %vreg17<kill>; R600_Reg32:%vreg17
368B%T1_X<def> = COPY %vreg3<kill>; R600_TReg32:%vreg3
384B%T1_Y<def> = COPY %vreg2<kill>; R600_TReg32:%vreg2
400B%T1_Z<def> = COPY %vreg1<kill>; R600_TReg32:%vreg1
416B%T1_W<def> = COPY %vreg0<kill>; R600_TReg32:%vreg0
432BRETURN %T1_W<imp-use>, %T1_Z<imp-use>, %T1_Y<imp-use>, %T1_X<imp-use>, %T2_W<imp-use,kill>, %T2_Z<imp-use,kill>, %T2_Y<imp-use,kill>, %T2_X<imp-use,kill>

# End machine code for function main.

# *** IR Dump Before Simple Register Coalescing ***:
# Machine code for function main: Post SSA
Function Live Ins: %T1_W in %vreg0, %T1_Z in %vreg1, %T1_Y in %vreg2, %T1_X in %vreg3
Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X

0BBB#0: derived from LLVM BB %0
    Live Ins: %T1_W %T1_Z %T1_Y %T1_X
16B%vreg3<def> = COPY %T1_X; R600_TReg32:%vreg3
32B%vreg2<def> = COPY %T1_Y; R600_TReg32:%vreg2
48B%vreg1<def> = COPY %T1_Z; R600_TReg32:%vreg1
64B%vreg0<def> = COPY %T1_W; R600_TReg32:%vreg0
80B%vreg4<def> = MOV 1, 0, 0, 0, %vreg3, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg4 R600_TReg32:%vreg3
96B%vreg5<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg5
112B%vreg6<def> = ADD 0, 0, 1, 0, 0, 0, %vreg4<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg6,%vreg4,%vreg5
128B%vreg7<def> = MOV 1, 0, 0, 0, %vreg2, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg7 R600_TReg32:%vreg2
144B%vreg8<def> = ADD 0, 0, 1, 0, 0, 0, %vreg7<kill>, 0, 0, 0, %vreg5<kill>, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg8,%vreg7,%vreg5
160B%vreg9:sel_x<def,read-undef> = COPY %vreg6<kill>; R600_Reg128:%vreg9 R600_Reg32:%vreg6
176B%vreg11<def> = COPY %vreg9<kill>; R600_Reg128:%vreg11,%vreg9
192B%vreg11:sel_y<def> = COPY %vreg8<kill>; R600_Reg128:%vreg11 R600_Reg32:%vreg8
208B%vreg12<def> = COPY %vreg11<kill>; R600_Reg128:%vreg12,%vreg11
224B%vreg12:sel_z<def> = COPY %vreg13<undef>; R600_Reg128:%vreg12 R600_Reg32:%vreg13
240B%vreg14<def> = COPY %vreg12<kill>; R600_Reg128:%vreg14,%vreg12
256B%vreg14:sel_w<def> = COPY %vreg15<undef>; R600_Reg128:%vreg14 R600_Reg32:%vreg15
272B%T2_X<def> = COPY %vreg3; R600_TReg32:%vreg3
288B%vreg16<def> = COPY %vreg14:sel_x; R600_Reg32:%vreg16 R600_Reg128:%vreg14
304B%T2_Y<def> = COPY %vreg2; R600_TReg32:%vreg2
320B%vreg17<def> = COPY %vreg14:sel_y<kill>; R600_Reg32:%vreg17 R600_Reg128:%vreg14
336B%T2_Z<def> = COPY %vreg16<kill>; R600_Reg32:%vreg16
352B%T2_W<def> = COPY %vreg17<kill>; R600_Reg32:%vreg17
368B%T1_X<def> = COPY %vreg3<kill>; R600_TReg32:%vreg3
384B%T1_Y<def> = COPY %vreg2<kill>; R600_TReg32:%vreg2
400B%T1_Z<def> = COPY %vreg1<kill>; R600_TReg32:%vreg1
416B%T1_W<def> = COPY %vreg0<kill>; R600_TReg32:%vreg0
432BRETURN %T1_W<imp-use>, %T1_Z<imp-use>, %T1_Y<imp-use>, %T1_X<imp-use>, %T2_W<imp-use,kill>, %T2_Z<imp-use,kill>, %T2_Y<imp-use,kill>, %T2_X<imp-use,kill>

# End machine code for function main.

********** SIMPLE REGISTER COALESCING **********
********** Function: main
********** JOINING INTERVALS ***********
:
16B%vreg3<def> = COPY %T1_X; R600_TReg32:%vreg3
Considering merging %vreg3 with %T1_X
Can only merge into reserved registers.
32B%vreg2<def> = COPY %T1_Y; R600_TReg32:%vreg2
Considering merging %vreg2 with %T1_Y
Can only merge into reserved registers.
48B%vreg1<def> = COPY %T1_Z; R600_TReg32:%vreg1
Considering merging %vreg1 with %T1_Z
Can only merge into reserved registers.
64B%vreg0<def> = COPY %T1_W; R600_TReg32:%vreg0
Considering merging %vreg0 with %T1_W
Can only merge into reserved registers.
160B%vreg9:sel_x<def,read-undef> = COPY %vreg6<kill>; R600_Reg128:%vreg9 R600_Reg32:%vreg6
Considering merging to R600_Reg128 with %vreg6 in %vreg9:sel_x
RHS = %vreg6 [112r,160r:0)  0 at 112r
LHS = %vreg9 [160r,176r:0)  0 at 160r
merge %vreg9:0 at 160r into %vreg6:0 at 112r --> @112r
erased:160r%vreg9:sel_x<def,read-undef> = COPY %vreg6<kill>; R600_Reg128:%vreg9 R600_Reg32:%vreg6
AllocationOrder(R600_Reg128) = [ %T0_XYZW %T1_XYZW %T2_XYZW %T3_XYZW %T4_XYZW %T5_XYZW %T6_XYZW %T7_XYZW %T8_XYZW %T9_XYZW %T10_XYZW %T11_XYZW %T12_XYZW %T13_XYZW %T14_XYZW %T15_XYZW %T16_XYZW %T17_XYZW %T18_XYZW %T19_XYZW %T20_XYZW %T21_XYZW %T22_XYZW %T23_XYZW %T24_XYZW %T25_XYZW %T26_XYZW %T27_XYZW %T28_XYZW %T29_XYZW %T30_XYZW %T31_XYZW %T32_XYZW %T33_XYZW %T34_XYZW %T35_XYZW %T36_XYZW %T37_XYZW %T38_XYZW %T39_XYZW %T40_XYZW %T41_XYZW %T42_XYZW %T43_XYZW %T44_XYZW %T45_XYZW %T46_XYZW %T47_XYZW %T48_XYZW %T49_XYZW %T50_XYZW %T51_XYZW %T52_XYZW %T53_XYZW %T54_XYZW %T55_XYZW %T56_XYZW %T57_XYZW %T58_XYZW %T59_XYZW %T60_XYZW %T61_XYZW %T62_XYZW %T63_XYZW %T64_XYZW %T65_XYZW %T66_XYZW %T67_XYZW %T68_XYZW %T69_XYZW %T70_XYZW %T71_XYZW %T72_XYZW %T73_XYZW %T74_XYZW %T75_XYZW %T76_XYZW %T77_XYZW %T78_XYZW %T79_XYZW %T80_XYZW %T81_XYZW %T82_XYZW %T83_XYZW %T84_XYZW %T85_XYZW %T86_XYZW %T87_XYZW %T88_XYZW %T89_XYZW %T90_XYZW %T91_XYZW %T92_XYZW %T93_XYZW
 %T94_XYZW %T95_XYZW %T96_XYZW %T97_XYZW %T98_XYZW %T99_XYZW %T100_XYZW %T101_XYZW %T102_XYZW %T103_XYZW %T104_XYZW %T105_XYZW %T106_XYZW %T107_XYZW %T108_XYZW %T109_XYZW %T110_XYZW %T111_XYZW %T112_XYZW %T113_XYZW %T114_XYZW %T115_XYZW %T116_XYZW %T117_XYZW %T118_XYZW %T119_XYZW %T120_XYZW %T121_XYZW %T122_XYZW %T123_XYZW %T124_XYZW %T125_XYZW %T126_XYZW %T127_XYZW ]
updated: 112B%vreg9:sel_x<def,read-undef> = ADD 0, 0, 1, 0, 0, 0, %vreg4<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg128:%vreg9 R600_Reg32:%vreg4,%vreg5
Joined. Result = %vreg9[112r,176r:0)  0 at 112r
176B%vreg11<def> = COPY %vreg9; R600_Reg128:%vreg11,%vreg9
Considering merging to R600_Reg128 with %vreg9 in %vreg11
RHS = %vreg9 [112r,176r:0)  0 at 112r
LHS = %vreg11 [176r,192r:1)[192r,208r:0)  0 at 192r 1 at 176r
merge %vreg11:1 at 176r into %vreg9:0 at 112r --> @112r
erased:176r%vreg11<def> = COPY %vreg9; R600_Reg128:%vreg11,%vreg9
updated: 112B%vreg11:sel_x<def,read-undef> = ADD 0, 0, 1, 0, 0, 0, %vreg4<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg128:%vreg11 R600_Reg32:%vreg4,%vreg5
Joined. Result = %vreg11[112r,192r:0)[192r,208r:1)  0 at 112r 1 at 192r
192B%vreg11:sel_y<def> = COPY %vreg8<kill>; R600_Reg128:%vreg11 R600_Reg32:%vreg8
Considering merging to R600_Reg128 with %vreg8 in %vreg11:sel_y
RHS = %vreg8 [144r,192r:0)  0 at 144r
LHS = %vreg11 [112r,192r:0)[192r,208r:1)  0 at 112r 1 at 192r
merge %vreg11:1 at 192r into %vreg8:0 at 144r --> @144r
pruned %vreg11 at 144r: [112r,144r:0)[192r,208r:1)  0 at 112r 1 at 192r
erased:192r%vreg11:sel_y<def> = COPY %vreg8<kill>; R600_Reg128:%vreg11 R600_Reg32:%vreg8
restoring liveness to 2 points: [112r,144r:0)[144r,208r:1)  0 at 112r 1 at 144r
updated: 144B%vreg11:sel_y<def> = ADD 0, 0, 1, 0, 0, 0, %vreg7<kill>, 0, 0, 0, %vreg5<kill>, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg128:%vreg11 R600_Reg32:%vreg7,%vreg5
Joined. Result = %vreg11[112r,144r:0)[144r,208r:1)  0 at 112r 1 at 144r
208B%vreg12<def> = COPY %vreg11; R600_Reg128:%vreg12,%vreg11
Considering merging to R600_Reg128 with %vreg11 in %vreg12
RHS = %vreg11 [112r,144r:0)[144r,208r:1)  0 at 112r 1 at 144r
LHS = %vreg12 [208r,224r:1)[224r,240r:0)  0 at 224r 1 at 208r
merge %vreg12:1 at 208r into %vreg11:1 at 144r --> @144r
erased:208r%vreg12<def> = COPY %vreg11; R600_Reg128:%vreg12,%vreg11
updated: 144B%vreg12:sel_y<def> = ADD 0, 0, 1, 0, 0, 0, %vreg7<kill>, 0, 0, 0, %vreg5<kill>, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg128:%vreg12 R600_Reg32:%vreg7,%vreg5
updated: 112B%vreg12:sel_x<def,read-undef> = ADD 0, 0, 1, 0, 0, 0, %vreg4<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg128:%vreg12 R600_Reg32:%vreg4,%vreg5
Joined. Result = %vreg12[112r,144r:0)[144r,224r:1)[224r,240r:2)  0 at 112r 1 at 144r 2 at 224r
224B%vreg12:sel_z<def> = COPY %vreg13<undef>; R600_Reg128:%vreg12 R600_Reg32:%vreg13
Considering merging to R600_Reg128 with %vreg13 in %vreg12:sel_z
RHS = %vreg13 EMPTY
LHS = %vreg12 [112r,144r:0)[144r,224r:1)[224r,240r:2)  0 at 112r 1 at 144r 2 at 224r
llc: /home/vlj/llvm/include/llvm/ADT/SmallVector.h:143: const T& llvm::SmallVectorTemplateCommon<T, <template-parameter-1-2> >::operator[](unsigned int) const [with T = int; <template-parameter-1-2> = void; llvm::SmallVectorTemplateCommon<T, <template-parameter-1-2> >::const_reference = const int&]: Assertion `begin() + idx < end()' failed.
0  llc             0x00000000014c4020
1  llc             0x00000000014c3cf6
2  libpthread.so.0 0x0000003dd820efe0
3  libc.so.6       0x0000003dd7e35925 gsignal + 53
4  libc.so.6       0x0000003dd7e370d8 abort + 328
5  libc.so.6       0x0000003dd7e2e6a2
6  libc.so.6       0x0000003dd7e2e752
7  llc             0x000000000094760b
8  llc             0x00000000010c8923
9  llc             0x00000000010caef9
10 llc             0x00000000010cb0e6
11 llc             0x00000000010c7f54
12 llc             0x00000000010cb2d0
13 llc             0x00000000010cb497
14 llc             0x00000000010cb618
15 llc             0x00000000010cba3d
16 llc             0x000000000102e90d llvm::MachineFunctionPass::runOnFunction(llvm::Function&) + 95
17 llc             0x00000000013ea521 llvm::FPPassManager::runOnFunction(llvm::Function&) + 383
18 llc             0x00000000013ea734 llvm::FPPassManager::runOnModule(llvm::Module&) + 102
19 llc             0x00000000013eaa5c llvm::MPPassManager::runOnModule(llvm::Module&) + 442
20 llc             0x00000000013eaf74 llvm::PassManagerImpl::run(llvm::Module&) + 120
21 llc             0x00000000013eb127 llvm::PassManager::run(llvm::Module&) + 39
22 llc             0x0000000000816ff3 main + 4244
23 libc.so.6       0x0000003dd7e21735 __libc_start_main + 245
24 llc             0x0000000000815989
Stack dump:
0.Program arguments: llc -march=r600 -mcpu=cayman -print-before-all -debug-only=regalloc /home/vlj/shader 
1.Running pass 'Function Pass Manager' on module '/home/vlj/shader'.
2.Running pass 'Simple Register Coalescing' on function '@main'




More information about the llvm-dev mailing list