[llvm] r248478 - Introduce target hook for optimizing register copies

Mon Sep 28 01:13:59 PDT 2015

Hi Matt,

On 24.09.2015 17:36, Matt Arsenault via llvm-commits wrote:
> Author: arsenm
> Date: Thu Sep 24 03:36:14 2015
> New Revision: 248478
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=248478&view=rev
> Log:
> Introduce target hook for optimizing register copies

This change broke the piglit (graphics) test
spec at glsl-1.30@execution at varying-packing-mixed-types, which can be run
manually as

.../piglit/bin/shader_runner .../piglit/tests/spec/glsl-1.30/execution/varying-packing-mixed-types.shader_test -auto

See the assertion failure output and backtrace below and the
attached failing IR generated by the test.

shader_runner: /home/daenzer/src/llvm-git/llvm/lib/CodeGen/RegisterScavenging.cpp:418: unsigned int llvm::RegScavenger::scavengeRegister(const llvm::TargetRegisterClass*, llvm::MachineBasicBlock::iterator, int): Assertion `Scavenged[SI].FrameIndex >= 0 && "Cannot scavenge register without an emergency spill slot!"' failed.

Program received signal SIGABRT, Aborted.
0x00007ffff539b107 in __GI_raise (sig=sig at entry=6) at ../nptl/sysdeps/unix/sysv/linux/raise.c:56
56	../nptl/sysdeps/unix/sysv/linux/raise.c: No such file or directory.
(gdb) bt
#0  0x00007ffff539b107 in __GI_raise (sig=sig at entry=6) at ../nptl/sysdeps/unix/sysv/linux/raise.c:56
#1  0x00007ffff539c4e8 in __GI_abort () at abort.c:89
#2  0x00007ffff5394226 in __assert_fail_base (fmt=0x7ffff54cad08 "%s%s%s:%u: %s%sAssertion `%s' failed.\n%n", assertion=assertion at entry=0x7fffeea159b8 "Scavenged[SI].FrameIndex >= 0 && \"Cannot scavenge register without an emergency spill slot!\"", 
    file=file at entry=0x7fffeea15690 "/home/daenzer/src/llvm-git/llvm/lib/CodeGen/RegisterScavenging.cpp", line=line at entry=418, 
    function=function at entry=0x7fffeea15cc0 <llvm::RegScavenger::scavengeRegister(llvm::TargetRegisterClass const*, llvm::MachineBasicBlock::bundle_iterator<llvm::MachineInstr, llvm::ilist_iterator<llvm::MachineInstr> >, int)::__PRETTY_FUNCTION__> "unsigned int llvm::RegScavenger::scavengeRegister(const llvm::TargetRegisterClass*, llvm::MachineBasicBlock::iterator, int)") at assert.c:92
#3  0x00007ffff53942d2 in __GI___assert_fail (assertion=assertion at entry=0x7fffeea159b8 "Scavenged[SI].FrameIndex >= 0 && \"Cannot scavenge register without an emergency spill slot!\"", 
    file=file at entry=0x7fffeea15690 "/home/daenzer/src/llvm-git/llvm/lib/CodeGen/RegisterScavenging.cpp", line=line at entry=418, 
    function=function at entry=0x7fffeea15cc0 <llvm::RegScavenger::scavengeRegister(llvm::TargetRegisterClass const*, llvm::MachineBasicBlock::bundle_iterator<llvm::MachineInstr, llvm::ilist_iterator<llvm::MachineInstr> >, int)::__PRETTY_FUNCTION__> "unsigned int llvm::RegScavenger::scavengeRegister(const llvm::TargetRegisterClass*, llvm::MachineBasicBlock::iterator, int)") at assert.c:101
#4  0x00007fffedfa6cbe in llvm::RegScavenger::scavengeRegister (this=this at entry=0xedea50, RC=0x7fffef0a5280 <llvm::AMDGPU::VGPR_32RegClass>, I=..., SPAdj=SPAdj at entry=0) at /home/daenzer/src/llvm-git/llvm/lib/CodeGen/RegisterScavenging.cpp:417
#5  0x00007fffee239a5f in llvm::SIRegisterInfo::eliminateFrameIndex (this=0x790a20, MI=..., SPAdj=0, FIOperandNum=1, RS=0xedea50) at /home/daenzer/src/llvm-git/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp:315
#6  0x00007fffedf5b858 in (anonymous namespace)::PEI::replaceFrameIndices (BB=BB at entry=0xd72020, Fn=..., SPAdj=@0x7fffffff5490: 0, this=0xd31840, this=0xd31840) at /home/daenzer/src/llvm-git/llvm/lib/CodeGen/PrologEpilogInserter.cpp:953
#7  0x00007fffedf5f5fb in replaceFrameIndices (Fn=..., this=<optimized out>) at /home/daenzer/src/llvm-git/llvm/lib/CodeGen/PrologEpilogInserter.cpp:851
#8  (anonymous namespace)::PEI::runOnMachineFunction (this=<optimized out>, Fn=...) at /home/daenzer/src/llvm-git/llvm/lib/CodeGen/PrologEpilogInserter.cpp:221
#9  0x00007fffedb57eb9 in llvm::FPPassManager::runOnFunction (this=0xcc10a0, F=...) at /home/daenzer/src/llvm-git/llvm/lib/IR/LegacyPassManager.cpp:1528
#10 0x00007fffedb5826b in llvm::FPPassManager::runOnModule (this=0xcc10a0, M=...) at /home/daenzer/src/llvm-git/llvm/lib/IR/LegacyPassManager.cpp:1549
#11 0x00007fffedb57af4 in runOnModule (M=..., this=<optimized out>) at /home/daenzer/src/llvm-git/llvm/lib/IR/LegacyPassManager.cpp:1605
#12 llvm::legacy::PassManagerImpl::run (this=0xcc9e20, M=...) at /home/daenzer/src/llvm-git/llvm/lib/IR/LegacyPassManager.cpp:1708
#13 0x00007fffedb57cce in llvm::legacy::PassManager::run (this=this at entry=0x7fffffff5720, M=...) at /home/daenzer/src/llvm-git/llvm/lib/IR/LegacyPassManager.cpp:1739
#14 0x00007fffedc41861 in LLVMTargetMachineEmit (T=T at entry=0x78f8d0, M=M at entry=0xcbb7e0, OS=..., codegen=codegen at entry=LLVMObjectFile, ErrorMessage=ErrorMessage at entry=0x7fffffff5a00) at /home/daenzer/src/llvm-git/llvm/lib/Target/TargetMachineC.cpp:216
#15 0x00007fffedc41bec in LLVMTargetMachineEmitToMemoryBuffer (T=T at entry=0x78f8d0, M=M at entry=0xcbb7e0, codegen=codegen at entry=LLVMObjectFile, ErrorMessage=ErrorMessage at entry=0x7fffffff5a00, OutMemBuf=OutMemBuf at entry=0x7fffffff5a08)
    at /home/daenzer/src/llvm-git/llvm/lib/Target/TargetMachineC.cpp:240
#16 0x00007ffff03e3d2f in radeon_llvm_compile (M=M at entry=0xcbb7e0, binary=binary at entry=0xcd5990, gpu_family=<optimized out>, dump_ir=dump_ir at entry=false, dump_asm=dump_asm at entry=false, tm=tm at entry=0x78f8d0)
    at ../../../../../src/gallium/drivers/radeon/radeon_llvm_emit.c:183
#17 0x00007ffff032bc9e in si_compile_llvm (sscreen=sscreen at entry=0x6900e0, shader=shader at entry=0xcd5960, tm=tm at entry=0x78f8d0, mod=0xcbb7e0) at ../../../../../src/gallium/drivers/radeonsi/si_shader.c:3818
#18 0x00007ffff032c9a0 in si_shader_create (sscreen=0x6900e0, tm=0x78f8d0, shader=shader at entry=0xcd5960) at ../../../../../src/gallium/drivers/radeonsi/si_shader.c:4119
#19 0x00007ffff033a169 in si_shader_select (ctx=0x63d540, sel=0xcd2830) at ../../../../../src/gallium/drivers/radeonsi/si_state_shaders.c:644
#20 0x00007ffff033bdf7 in si_update_shaders (sctx=sctx at entry=0x63d540) at ../../../../../src/gallium/drivers/radeonsi/si_state_shaders.c:1494
#21 0x00007ffff0336bde in si_draw_vbo (ctx=0x63d540, info=0x7fffffffdf80) at ../../../../../src/gallium/drivers/radeonsi/si_state_draw.c:762
#22 0x00007ffff00ff231 in u_vbuf_draw_vbo (mgr=0x7fc720, info=<optimized out>) at ../../../../src/gallium/auxiliary/util/u_vbuf.c:1293
#23 0x00007fffefeaac2b in st_draw_vbo (ctx=0x79a6c0, prims=<optimized out>, nr_prims=<optimized out>, ib=0x0, index_bounds_valid=<optimized out>, min_index=0, max_index=3, tfb_vertcount=0x0, stream=0, indirect=0x0)
    at ../../../src/mesa/state_tracker/st_draw.c:291
#24 0x00007fffefe6f3ca in vbo_draw_arrays (ctx=0x79a6c0, mode=5, start=0, count=4, numInstances=1, baseInstance=0) at ../../../src/mesa/vbo/vbo_exec_array.c:645
#25 0x00007ffff7ab8a8b in stub_glDrawArrays (mode=5, first=0, count=4) at tests/util/piglit-dispatch-gen.c:11317
#26 0x00007ffff7b20024 in piglit_draw_rect_from_arrays (verts=0x7fffffffe330, tex=0x0, use_patches=false) at tests/util/piglit-util-gl.c:707
#27 0x00007ffff7b20427 in piglit_draw_rect_custom (x=-1, y=-1, w=2, h=2, use_patches=false) at tests/util/piglit-util-gl.c:820
#28 0x00007ffff7b20473 in piglit_draw_rect (x=-1, y=-1, w=2, h=2) at tests/util/piglit-util-gl.c:829
#29 0x000000000040afb7 in piglit_display () at tests/shaders/shader_runner.c:2699
#30 0x00007ffff7b4413a in run_test (gl_fw=0x624c20, argc=2, argv=0x7fffffffe6f8) at tests/util/piglit-framework-gl/piglit_winsys_framework.c:79
#31 0x00007ffff7b28f47 in piglit_gl_test_run (argc=2, argv=0x7fffffffe6f8, config=0x7fffffffe5c0) at tests/util/piglit-framework-gl.c:199
#32 0x0000000000405631 in main (argc=2, argv=0x7fffffffe6f8) at tests/shaders/shader_runner.c:54

-- 
Earthling Michel Dänzer               |               http://www.amd.com
Libre software enthusiast             |             Mesa and X developer
-------------- next part --------------
; ModuleID = 'tgsi'

define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
  %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
  %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
  %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
  %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
  %16 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
  %17 = load <16 x i8>, <16 x i8> addrspace(2)* %16, align 16, !tbaa !0
  %18 = add i32 %5, %7
  %19 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %17, i32 0, i32 %18)
  %20 = extractelement <4 x float> %19, i32 0
  %21 = extractelement <4 x float> %19, i32 1
  %22 = extractelement <4 x float> %19, i32 2
  %23 = extractelement <4 x float> %19, i32 3
  %24 = bitcast float %15 to i32
  br label %LOOP

LOOP:                                             ; preds = %ENDIF, %main_body
  %temp248.0 = phi float [ 0.000000e+00, %main_body ], [ %217, %ENDIF ]
  %temp244.0 = phi float [ 0.000000e+00, %main_body ], [ %216, %ENDIF ]
  %temp240.0 = phi float [ 0.000000e+00, %main_body ], [ %215, %ENDIF ]
  %temp252.0 = phi float [ 0.000000e+00, %main_body ], [ %218, %ENDIF ]
  %temp236.0 = phi float [ 0.000000e+00, %main_body ], [ %214, %ENDIF ]
  %temp232.0 = phi float [ 0.000000e+00, %main_body ], [ %213, %ENDIF ]
  %temp228.0 = phi float [ 0.000000e+00, %main_body ], [ %212, %ENDIF ]
  %temp256.0 = phi float [ 0.000000e+00, %main_body ], [ %219, %ENDIF ]
  %temp224.0 = phi float [ 0.000000e+00, %main_body ], [ %211, %ENDIF ]
  %temp220.0 = phi float [ 0.000000e+00, %main_body ], [ %210, %ENDIF ]
  %temp216.0 = phi float [ 0.000000e+00, %main_body ], [ %209, %ENDIF ]
  %temp260.0 = phi float [ 0.000000e+00, %main_body ], [ %220, %ENDIF ]
  %temp212.0 = phi float [ 0.000000e+00, %main_body ], [ %208, %ENDIF ]
  %temp208.0 = phi float [ 0.000000e+00, %main_body ], [ %207, %ENDIF ]
  %temp204.0 = phi float [ 0.000000e+00, %main_body ], [ %206, %ENDIF ]
  %temp264.0 = phi float [ 0.000000e+00, %main_body ], [ %221, %ENDIF ]
  %temp200.0 = phi float [ 0.000000e+00, %main_body ], [ %205, %ENDIF ]
  %temp196.0 = phi float [ 0.000000e+00, %main_body ], [ %204, %ENDIF ]
  %temp192.0 = phi float [ 0.000000e+00, %main_body ], [ %203, %ENDIF ]
  %temp268.0 = phi float [ 0.000000e+00, %main_body ], [ %222, %ENDIF ]
  %temp188.0 = phi float [ 0.000000e+00, %main_body ], [ %202, %ENDIF ]
  %temp184.0 = phi float [ 0.000000e+00, %main_body ], [ %201, %ENDIF ]
  %temp180.0 = phi float [ 0.000000e+00, %main_body ], [ %200, %ENDIF ]
  %temp272.0 = phi float [ 0.000000e+00, %main_body ], [ %223, %ENDIF ]
  %temp176.0 = phi float [ 0.000000e+00, %main_body ], [ %199, %ENDIF ]
  %temp172.0 = phi float [ 0.000000e+00, %main_body ], [ %198, %ENDIF ]
  %temp168.0 = phi float [ 0.000000e+00, %main_body ], [ %197, %ENDIF ]
  %temp276.0 = phi float [ 0.000000e+00, %main_body ], [ %224, %ENDIF ]
  %temp164.0 = phi float [ 0.000000e+00, %main_body ], [ %196, %ENDIF ]
  %temp160.0 = phi float [ 0.000000e+00, %main_body ], [ %195, %ENDIF ]
  %temp156.0 = phi float [ 0.000000e+00, %main_body ], [ %194, %ENDIF ]
  %temp280.0 = phi float [ 0.000000e+00, %main_body ], [ %225, %ENDIF ]
  %temp152.0 = phi float [ 0.000000e+00, %main_body ], [ %193, %ENDIF ]
  %temp148.0 = phi float [ 0.000000e+00, %main_body ], [ %192, %ENDIF ]
  %temp144.0 = phi float [ 0.000000e+00, %main_body ], [ %191, %ENDIF ]
  %temp284.0 = phi float [ 0.000000e+00, %main_body ], [ %226, %ENDIF ]
  %temp140.0 = phi float [ 0.000000e+00, %main_body ], [ %190, %ENDIF ]
  %temp136.0 = phi float [ 0.000000e+00, %main_body ], [ %189, %ENDIF ]
  %temp132.0 = phi float [ 0.000000e+00, %main_body ], [ %188, %ENDIF ]
  %temp288.0 = phi float [ 0.000000e+00, %main_body ], [ %227, %ENDIF ]
  %temp128.0 = phi float [ 0.000000e+00, %main_body ], [ %187, %ENDIF ]
  %temp124.0 = phi float [ 0.000000e+00, %main_body ], [ %186, %ENDIF ]
  %temp120.0 = phi float [ 0.000000e+00, %main_body ], [ %185, %ENDIF ]
  %temp292.0 = phi float [ 0.000000e+00, %main_body ], [ %228, %ENDIF ]
  %temp116.0 = phi float [ 0.000000e+00, %main_body ], [ %184, %ENDIF ]
  %temp112.0 = phi float [ 0.000000e+00, %main_body ], [ %183, %ENDIF ]
  %temp108.0 = phi float [ 0.000000e+00, %main_body ], [ %182, %ENDIF ]
  %temp296.0 = phi float [ 0.000000e+00, %main_body ], [ %229, %ENDIF ]
  %temp104.0 = phi float [ 0.000000e+00, %main_body ], [ %181, %ENDIF ]
  %temp100.0 = phi float [ 0.000000e+00, %main_body ], [ %180, %ENDIF ]
  %temp96.0 = phi float [ 0.000000e+00, %main_body ], [ %179, %ENDIF ]
  %temp300.0 = phi float [ 0.000000e+00, %main_body ], [ %230, %ENDIF ]
  %temp92.0 = phi float [ 0.000000e+00, %main_body ], [ %178, %ENDIF ]
  %temp88.0 = phi float [ 0.000000e+00, %main_body ], [ %177, %ENDIF ]
  %temp84.0 = phi float [ 0.000000e+00, %main_body ], [ %176, %ENDIF ]
  %temp304.0 = phi float [ 0.000000e+00, %main_body ], [ %231, %ENDIF ]
  %temp80.0 = phi float [ 0.000000e+00, %main_body ], [ %175, %ENDIF ]
  %temp76.0 = phi float [ 0.000000e+00, %main_body ], [ %174, %ENDIF ]
  %temp72.0 = phi float [ 0.000000e+00, %main_body ], [ %173, %ENDIF ]
  %temp308.0 = phi float [ 0.000000e+00, %main_body ], [ %232, %ENDIF ]
  %temp68.0 = phi float [ 0.000000e+00, %main_body ], [ %172, %ENDIF ]
  %temp64.0 = phi float [ 0.000000e+00, %main_body ], [ %171, %ENDIF ]
  %temp60.0 = phi float [ 0.000000e+00, %main_body ], [ %170, %ENDIF ]
  %temp312.0 = phi float [ 0.000000e+00, %main_body ], [ %233, %ENDIF ]
  %temp56.0 = phi float [ 0.000000e+00, %main_body ], [ %169, %ENDIF ]
  %temp52.0 = phi float [ 0.000000e+00, %main_body ], [ %168, %ENDIF ]
  %temp48.0 = phi float [ 0.000000e+00, %main_body ], [ %167, %ENDIF ]
  %temp316.0 = phi float [ 0.000000e+00, %main_body ], [ %234, %ENDIF ]
  %temp44.0 = phi float [ 0.000000e+00, %main_body ], [ %166, %ENDIF ]
  %temp40.0 = phi float [ 0.000000e+00, %main_body ], [ %165, %ENDIF ]
  %temp36.0 = phi float [ 0.000000e+00, %main_body ], [ %164, %ENDIF ]
  %temp320.0 = phi float [ 0.000000e+00, %main_body ], [ %235, %ENDIF ]
  %temp32.0 = phi float [ 0.000000e+00, %main_body ], [ %163, %ENDIF ]
  %temp28.0 = phi float [ 0.000000e+00, %main_body ], [ %162, %ENDIF ]
  %temp24.0 = phi float [ 0.000000e+00, %main_body ], [ %161, %ENDIF ]
  %temp324.0 = phi float [ 0.000000e+00, %main_body ], [ %236, %ENDIF ]
  %temp20.0 = phi float [ 0.000000e+00, %main_body ], [ %160, %ENDIF ]
  %temp16.0 = phi float [ 0.000000e+00, %main_body ], [ %159, %ENDIF ]
  %temp12.0 = phi float [ 0.000000e+00, %main_body ], [ %158, %ENDIF ]
  %temp328.0 = phi float [ 0.000000e+00, %main_body ], [ %237, %ENDIF ]
  %temp332.0 = phi float [ 0.000000e+00, %main_body ], [ %238, %ENDIF ]
  %temp336.0 = phi float [ 0.000000e+00, %main_body ], [ %239, %ENDIF ]
  %temp4.0 = phi float [ 0.000000e+00, %main_body ], [ %286, %ENDIF ]
  %temp340.0 = phi float [ 0.000000e+00, %main_body ], [ %240, %ENDIF ]
  %temp344.0 = phi float [ 0.000000e+00, %main_body ], [ %241, %ENDIF ]
  %temp348.0 = phi float [ 0.000000e+00, %main_body ], [ %242, %ENDIF ]
  %temp352.0 = phi float [ 0.000000e+00, %main_body ], [ %243, %ENDIF ]
  %temp356.0 = phi float [ 0.000000e+00, %main_body ], [ %244, %ENDIF ]
  %temp360.0 = phi float [ 0.000000e+00, %main_body ], [ %245, %ENDIF ]
  %temp364.0 = phi float [ 0.000000e+00, %main_body ], [ %246, %ENDIF ]
  %temp368.0 = phi float [ 0.000000e+00, %main_body ], [ %247, %ENDIF ]
  %temp372.0 = phi float [ 0.000000e+00, %main_body ], [ %248, %ENDIF ]
  %temp376.0 = phi float [ 0.000000e+00, %main_body ], [ %249, %ENDIF ]
  %temp380.0 = phi float [ 0.000000e+00, %main_body ], [ %250, %ENDIF ]
  %temp384.0 = phi float [ 0.000000e+00, %main_body ], [ %251, %ENDIF ]
  %temp388.0 = phi float [ 0.000000e+00, %main_body ], [ %252, %ENDIF ]
  %temp392.0 = phi float [ 0.000000e+00, %main_body ], [ %253, %ENDIF ]
  %temp396.0 = phi float [ 0.000000e+00, %main_body ], [ %254, %ENDIF ]
  %temp400.0 = phi float [ 0.000000e+00, %main_body ], [ %255, %ENDIF ]
  %temp404.0 = phi float [ 0.000000e+00, %main_body ], [ %256, %ENDIF ]
  %temp408.0 = phi float [ 0.000000e+00, %main_body ], [ %257, %ENDIF ]
  %temp412.0 = phi float [ 0.000000e+00, %main_body ], [ %258, %ENDIF ]
  %temp416.0 = phi float [ 0.000000e+00, %main_body ], [ %259, %ENDIF ]
  %temp420.0 = phi float [ 0.000000e+00, %main_body ], [ %260, %ENDIF ]
  %temp424.0 = phi float [ 0.000000e+00, %main_body ], [ %261, %ENDIF ]
  %temp428.0 = phi float [ 0.000000e+00, %main_body ], [ %262, %ENDIF ]
  %temp432.0 = phi float [ 0.000000e+00, %main_body ], [ %263, %ENDIF ]
  %temp436.0 = phi float [ 0.000000e+00, %main_body ], [ %264, %ENDIF ]
  %temp440.0 = phi float [ 0.000000e+00, %main_body ], [ %265, %ENDIF ]
  %temp444.0 = phi float [ 0.000000e+00, %main_body ], [ %266, %ENDIF ]
  %temp448.0 = phi float [ 0.000000e+00, %main_body ], [ %267, %ENDIF ]
  %temp452.0 = phi float [ 0.000000e+00, %main_body ], [ %268, %ENDIF ]
  %temp456.0 = phi float [ 0.000000e+00, %main_body ], [ %269, %ENDIF ]
  %temp460.0 = phi float [ 0.000000e+00, %main_body ], [ %270, %ENDIF ]
  %temp464.0 = phi float [ 0.000000e+00, %main_body ], [ %271, %ENDIF ]
  %temp468.0 = phi float [ 0.000000e+00, %main_body ], [ %272, %ENDIF ]
  %temp472.0 = phi float [ 0.000000e+00, %main_body ], [ %273, %ENDIF ]
  %temp476.0 = phi float [ 0.000000e+00, %main_body ], [ %274, %ENDIF ]
  %temp480.0 = phi float [ 0.000000e+00, %main_body ], [ %275, %ENDIF ]
  %temp484.0 = phi float [ 0.000000e+00, %main_body ], [ %276, %ENDIF ]
  %temp488.0 = phi float [ 0.000000e+00, %main_body ], [ %277, %ENDIF ]
  %temp492.0 = phi float [ 0.000000e+00, %main_body ], [ %278, %ENDIF ]
  %temp496.0 = phi float [ 0.000000e+00, %main_body ], [ %279, %ENDIF ]
  %temp500.0 = phi float [ 0.000000e+00, %main_body ], [ %280, %ENDIF ]
  %temp504.0 = phi float [ 0.000000e+00, %main_body ], [ %281, %ENDIF ]
  %temp508.0 = phi float [ 0.000000e+00, %main_body ], [ %282, %ENDIF ]
  %temp512.0 = phi float [ 0.000000e+00, %main_body ], [ %283, %ENDIF ]
  %25 = bitcast float %temp4.0 to i32
  %26 = icmp sgt i32 %25, 125
  br i1 %26, label %IF, label %ENDIF

IF:                                               ; preds = %LOOP
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %13, float %temp12.0, float %temp16.0, float %temp20.0)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %temp24.0, float %temp28.0, float %temp32.0, float %temp36.0)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %temp40.0, float %temp44.0, float %temp48.0, float %temp52.0)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %temp56.0, float %temp60.0, float %temp64.0, float %temp68.0)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %temp72.0, float %temp76.0, float %temp80.0, float %temp84.0)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %temp88.0, float %temp92.0, float %temp96.0, float %temp100.0)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 38, i32 0, float %temp104.0, float %temp108.0, float %temp112.0, float %temp116.0)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 39, i32 0, float %temp120.0, float %temp124.0, float %temp128.0, float %temp132.0)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 40, i32 0, float %temp136.0, float %temp140.0, float %temp144.0, float %temp148.0)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 41, i32 0, float %temp152.0, float %temp156.0, float %temp160.0, float %temp164.0)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 42, i32 0, float %temp168.0, float %temp172.0, float %temp176.0, float %temp180.0)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 43, i32 0, float %temp184.0, float %temp188.0, float %temp192.0, float %temp196.0)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 44, i32 0, float %temp200.0, float %temp204.0, float %temp208.0, float %temp212.0)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 45, i32 0, float %temp216.0, float %temp220.0, float %temp224.0, float %temp228.0)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 46, i32 0, float %temp232.0, float %temp236.0, float %temp240.0, float %temp244.0)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 47, i32 0, float %temp248.0, float %temp252.0, float %temp256.0, float %temp260.0)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 48, i32 0, float %temp264.0, float %temp268.0, float %temp272.0, float %temp276.0)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 49, i32 0, float %temp280.0, float %temp284.0, float %temp288.0, float %temp292.0)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 50, i32 0, float %temp296.0, float %temp300.0, float %temp304.0, float %temp308.0)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 51, i32 0, float %temp312.0, float %temp316.0, float %temp320.0, float %temp324.0)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 52, i32 0, float %temp328.0, float %temp332.0, float %temp336.0, float %temp340.0)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 53, i32 0, float %temp344.0, float %temp348.0, float %temp352.0, float %temp356.0)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 54, i32 0, float %temp360.0, float %temp364.0, float %temp368.0, float %temp372.0)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 55, i32 0, float %temp376.0, float %temp380.0, float %temp384.0, float %temp388.0)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 56, i32 0, float %temp392.0, float %temp396.0, float %temp400.0, float %temp404.0)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 57, i32 0, float %temp408.0, float %temp412.0, float %temp416.0, float %temp420.0)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 58, i32 0, float %temp424.0, float %temp428.0, float %temp432.0, float %temp436.0)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 59, i32 0, float %temp440.0, float %temp444.0, float %temp448.0, float %temp452.0)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 60, i32 0, float %temp456.0, float %temp460.0, float %temp464.0, float %temp468.0)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 61, i32 0, float %temp472.0, float %temp476.0, float %temp480.0, float %temp484.0)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 62, i32 0, float %temp488.0, float %temp492.0, float %temp496.0, float %temp500.0)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 63, i32 0, float %temp504.0, float %temp508.0, float %temp512.0, float %14)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %20, float %21, float %22, float %23)
  ret void

ENDIF:                                            ; preds = %LOOP
  %27 = bitcast float %temp4.0 to i32
  %28 = bitcast float %temp4.0 to i32
  %29 = add i32 %24, %28
  %30 = bitcast i32 %29 to float
  %31 = insertelement <126 x float> undef, float %temp12.0, i32 0
  %32 = insertelement <126 x float> %31, float %temp16.0, i32 1
  %33 = insertelement <126 x float> %32, float %temp20.0, i32 2
  %34 = insertelement <126 x float> %33, float %temp24.0, i32 3
  %35 = insertelement <126 x float> %34, float %temp28.0, i32 4
  %36 = insertelement <126 x float> %35, float %temp32.0, i32 5
  %37 = insertelement <126 x float> %36, float %temp36.0, i32 6
  %38 = insertelement <126 x float> %37, float %temp40.0, i32 7
  %39 = insertelement <126 x float> %38, float %temp44.0, i32 8
  %40 = insertelement <126 x float> %39, float %temp48.0, i32 9
  %41 = insertelement <126 x float> %40, float %temp52.0, i32 10
  %42 = insertelement <126 x float> %41, float %temp56.0, i32 11
  %43 = insertelement <126 x float> %42, float %temp60.0, i32 12
  %44 = insertelement <126 x float> %43, float %temp64.0, i32 13
  %45 = insertelement <126 x float> %44, float %temp68.0, i32 14
  %46 = insertelement <126 x float> %45, float %temp72.0, i32 15
  %47 = insertelement <126 x float> %46, float %temp76.0, i32 16
  %48 = insertelement <126 x float> %47, float %temp80.0, i32 17
  %49 = insertelement <126 x float> %48, float %temp84.0, i32 18
  %50 = insertelement <126 x float> %49, float %temp88.0, i32 19
  %51 = insertelement <126 x float> %50, float %temp92.0, i32 20
  %52 = insertelement <126 x float> %51, float %temp96.0, i32 21
  %53 = insertelement <126 x float> %52, float %temp100.0, i32 22
  %54 = insertelement <126 x float> %53, float %temp104.0, i32 23
  %55 = insertelement <126 x float> %54, float %temp108.0, i32 24
  %56 = insertelement <126 x float> %55, float %temp112.0, i32 25
  %57 = insertelement <126 x float> %56, float %temp116.0, i32 26
  %58 = insertelement <126 x float> %57, float %temp120.0, i32 27
  %59 = insertelement <126 x float> %58, float %temp124.0, i32 28
  %60 = insertelement <126 x float> %59, float %temp128.0, i32 29
  %61 = insertelement <126 x float> %60, float %temp132.0, i32 30
  %62 = insertelement <126 x float> %61, float %temp136.0, i32 31
  %63 = insertelement <126 x float> %62, float %temp140.0, i32 32
  %64 = insertelement <126 x float> %63, float %temp144.0, i32 33
  %65 = insertelement <126 x float> %64, float %temp148.0, i32 34
  %66 = insertelement <126 x float> %65, float %temp152.0, i32 35
  %67 = insertelement <126 x float> %66, float %temp156.0, i32 36
  %68 = insertelement <126 x float> %67, float %temp160.0, i32 37
  %69 = insertelement <126 x float> %68, float %temp164.0, i32 38
  %70 = insertelement <126 x float> %69, float %temp168.0, i32 39
  %71 = insertelement <126 x float> %70, float %temp172.0, i32 40
  %72 = insertelement <126 x float> %71, float %temp176.0, i32 41
  %73 = insertelement <126 x float> %72, float %temp180.0, i32 42
  %74 = insertelement <126 x float> %73, float %temp184.0, i32 43
  %75 = insertelement <126 x float> %74, float %temp188.0, i32 44
  %76 = insertelement <126 x float> %75, float %temp192.0, i32 45
  %77 = insertelement <126 x float> %76, float %temp196.0, i32 46
  %78 = insertelement <126 x float> %77, float %temp200.0, i32 47
  %79 = insertelement <126 x float> %78, float %temp204.0, i32 48
  %80 = insertelement <126 x float> %79, float %temp208.0, i32 49
  %81 = insertelement <126 x float> %80, float %temp212.0, i32 50
  %82 = insertelement <126 x float> %81, float %temp216.0, i32 51
  %83 = insertelement <126 x float> %82, float %temp220.0, i32 52
  %84 = insertelement <126 x float> %83, float %temp224.0, i32 53
  %85 = insertelement <126 x float> %84, float %temp228.0, i32 54
  %86 = insertelement <126 x float> %85, float %temp232.0, i32 55
  %87 = insertelement <126 x float> %86, float %temp236.0, i32 56
  %88 = insertelement <126 x float> %87, float %temp240.0, i32 57
  %89 = insertelement <126 x float> %88, float %temp244.0, i32 58
  %90 = insertelement <126 x float> %89, float %temp248.0, i32 59
  %91 = insertelement <126 x float> %90, float %temp252.0, i32 60
  %92 = insertelement <126 x float> %91, float %temp256.0, i32 61
  %93 = insertelement <126 x float> %92, float %temp260.0, i32 62
  %94 = insertelement <126 x float> %93, float %temp264.0, i32 63
  %95 = insertelement <126 x float> %94, float %temp268.0, i32 64
  %96 = insertelement <126 x float> %95, float %temp272.0, i32 65
  %97 = insertelement <126 x float> %96, float %temp276.0, i32 66
  %98 = insertelement <126 x float> %97, float %temp280.0, i32 67
  %99 = insertelement <126 x float> %98, float %temp284.0, i32 68
  %100 = insertelement <126 x float> %99, float %temp288.0, i32 69
  %101 = insertelement <126 x float> %100, float %temp292.0, i32 70
  %102 = insertelement <126 x float> %101, float %temp296.0, i32 71
  %103 = insertelement <126 x float> %102, float %temp300.0, i32 72
  %104 = insertelement <126 x float> %103, float %temp304.0, i32 73
  %105 = insertelement <126 x float> %104, float %temp308.0, i32 74
  %106 = insertelement <126 x float> %105, float %temp312.0, i32 75
  %107 = insertelement <126 x float> %106, float %temp316.0, i32 76
  %108 = insertelement <126 x float> %107, float %temp320.0, i32 77
  %109 = insertelement <126 x float> %108, float %temp324.0, i32 78
  %110 = insertelement <126 x float> %109, float %temp328.0, i32 79
  %111 = insertelement <126 x float> %110, float %temp332.0, i32 80
  %112 = insertelement <126 x float> %111, float %temp336.0, i32 81
  %113 = insertelement <126 x float> %112, float %temp340.0, i32 82
  %114 = insertelement <126 x float> %113, float %temp344.0, i32 83
  %115 = insertelement <126 x float> %114, float %temp348.0, i32 84
  %116 = insertelement <126 x float> %115, float %temp352.0, i32 85
  %117 = insertelement <126 x float> %116, float %temp356.0, i32 86
  %118 = insertelement <126 x float> %117, float %temp360.0, i32 87
  %119 = insertelement <126 x float> %118, float %temp364.0, i32 88
  %120 = insertelement <126 x float> %119, float %temp368.0, i32 89
  %121 = insertelement <126 x float> %120, float %temp372.0, i32 90
  %122 = insertelement <126 x float> %121, float %temp376.0, i32 91
  %123 = insertelement <126 x float> %122, float %temp380.0, i32 92
  %124 = insertelement <126 x float> %123, float %temp384.0, i32 93
  %125 = insertelement <126 x float> %124, float %temp388.0, i32 94
  %126 = insertelement <126 x float> %125, float %temp392.0, i32 95
  %127 = insertelement <126 x float> %126, float %temp396.0, i32 96
  %128 = insertelement <126 x float> %127, float %temp400.0, i32 97
  %129 = insertelement <126 x float> %128, float %temp404.0, i32 98
  %130 = insertelement <126 x float> %129, float %temp408.0, i32 99
  %131 = insertelement <126 x float> %130, float %temp412.0, i32 100
  %132 = insertelement <126 x float> %131, float %temp416.0, i32 101
  %133 = insertelement <126 x float> %132, float %temp420.0, i32 102
  %134 = insertelement <126 x float> %133, float %temp424.0, i32 103
  %135 = insertelement <126 x float> %134, float %temp428.0, i32 104
  %136 = insertelement <126 x float> %135, float %temp432.0, i32 105
  %137 = insertelement <126 x float> %136, float %temp436.0, i32 106
  %138 = insertelement <126 x float> %137, float %temp440.0, i32 107
  %139 = insertelement <126 x float> %138, float %temp444.0, i32 108
  %140 = insertelement <126 x float> %139, float %temp448.0, i32 109
  %141 = insertelement <126 x float> %140, float %temp452.0, i32 110
  %142 = insertelement <126 x float> %141, float %temp456.0, i32 111
  %143 = insertelement <126 x float> %142, float %temp460.0, i32 112
  %144 = insertelement <126 x float> %143, float %temp464.0, i32 113
  %145 = insertelement <126 x float> %144, float %temp468.0, i32 114
  %146 = insertelement <126 x float> %145, float %temp472.0, i32 115
  %147 = insertelement <126 x float> %146, float %temp476.0, i32 116
  %148 = insertelement <126 x float> %147, float %temp480.0, i32 117
  %149 = insertelement <126 x float> %148, float %temp484.0, i32 118
  %150 = insertelement <126 x float> %149, float %temp488.0, i32 119
  %151 = insertelement <126 x float> %150, float %temp492.0, i32 120
  %152 = insertelement <126 x float> %151, float %temp496.0, i32 121
  %153 = insertelement <126 x float> %152, float %temp500.0, i32 122
  %154 = insertelement <126 x float> %153, float %temp504.0, i32 123
  %155 = insertelement <126 x float> %154, float %temp508.0, i32 124
  %156 = insertelement <126 x float> %155, float %temp512.0, i32 125
  %157 = insertelement <126 x float> %156, float %30, i32 %27
  %158 = extractelement <126 x float> %157, i32 0
  %159 = extractelement <126 x float> %157, i32 1
  %160 = extractelement <126 x float> %157, i32 2
  %161 = extractelement <126 x float> %157, i32 3
  %162 = extractelement <126 x float> %157, i32 4
  %163 = extractelement <126 x float> %157, i32 5
  %164 = extractelement <126 x float> %157, i32 6
  %165 = extractelement <126 x float> %157, i32 7
  %166 = extractelement <126 x float> %157, i32 8
  %167 = extractelement <126 x float> %157, i32 9
  %168 = extractelement <126 x float> %157, i32 10
  %169 = extractelement <126 x float> %157, i32 11
  %170 = extractelement <126 x float> %157, i32 12
  %171 = extractelement <126 x float> %157, i32 13
  %172 = extractelement <126 x float> %157, i32 14
  %173 = extractelement <126 x float> %157, i32 15
  %174 = extractelement <126 x float> %157, i32 16
  %175 = extractelement <126 x float> %157, i32 17
  %176 = extractelement <126 x float> %157, i32 18
  %177 = extractelement <126 x float> %157, i32 19
  %178 = extractelement <126 x float> %157, i32 20
  %179 = extractelement <126 x float> %157, i32 21
  %180 = extractelement <126 x float> %157, i32 22
  %181 = extractelement <126 x float> %157, i32 23
  %182 = extractelement <126 x float> %157, i32 24
  %183 = extractelement <126 x float> %157, i32 25
  %184 = extractelement <126 x float> %157, i32 26
  %185 = extractelement <126 x float> %157, i32 27
  %186 = extractelement <126 x float> %157, i32 28
  %187 = extractelement <126 x float> %157, i32 29
  %188 = extractelement <126 x float> %157, i32 30
  %189 = extractelement <126 x float> %157, i32 31
  %190 = extractelement <126 x float> %157, i32 32
  %191 = extractelement <126 x float> %157, i32 33
  %192 = extractelement <126 x float> %157, i32 34
  %193 = extractelement <126 x float> %157, i32 35
  %194 = extractelement <126 x float> %157, i32 36
  %195 = extractelement <126 x float> %157, i32 37
  %196 = extractelement <126 x float> %157, i32 38
  %197 = extractelement <126 x float> %157, i32 39
  %198 = extractelement <126 x float> %157, i32 40
  %199 = extractelement <126 x float> %157, i32 41
  %200 = extractelement <126 x float> %157, i32 42
  %201 = extractelement <126 x float> %157, i32 43
  %202 = extractelement <126 x float> %157, i32 44
  %203 = extractelement <126 x float> %157, i32 45
  %204 = extractelement <126 x float> %157, i32 46
  %205 = extractelement <126 x float> %157, i32 47
  %206 = extractelement <126 x float> %157, i32 48
  %207 = extractelement <126 x float> %157, i32 49
  %208 = extractelement <126 x float> %157, i32 50
  %209 = extractelement <126 x float> %157, i32 51
  %210 = extractelement <126 x float> %157, i32 52
  %211 = extractelement <126 x float> %157, i32 53
  %212 = extractelement <126 x float> %157, i32 54
  %213 = extractelement <126 x float> %157, i32 55
  %214 = extractelement <126 x float> %157, i32 56
  %215 = extractelement <126 x float> %157, i32 57
  %216 = extractelement <126 x float> %157, i32 58
  %217 = extractelement <126 x float> %157, i32 59
  %218 = extractelement <126 x float> %157, i32 60
  %219 = extractelement <126 x float> %157, i32 61
  %220 = extractelement <126 x float> %157, i32 62
  %221 = extractelement <126 x float> %157, i32 63
  %222 = extractelement <126 x float> %157, i32 64
  %223 = extractelement <126 x float> %157, i32 65
  %224 = extractelement <126 x float> %157, i32 66
  %225 = extractelement <126 x float> %157, i32 67
  %226 = extractelement <126 x float> %157, i32 68
  %227 = extractelement <126 x float> %157, i32 69
  %228 = extractelement <126 x float> %157, i32 70
  %229 = extractelement <126 x float> %157, i32 71
  %230 = extractelement <126 x float> %157, i32 72
  %231 = extractelement <126 x float> %157, i32 73
  %232 = extractelement <126 x float> %157, i32 74
  %233 = extractelement <126 x float> %157, i32 75
  %234 = extractelement <126 x float> %157, i32 76
  %235 = extractelement <126 x float> %157, i32 77
  %236 = extractelement <126 x float> %157, i32 78
  %237 = extractelement <126 x float> %157, i32 79
  %238 = extractelement <126 x float> %157, i32 80
  %239 = extractelement <126 x float> %157, i32 81
  %240 = extractelement <126 x float> %157, i32 82
  %241 = extractelement <126 x float> %157, i32 83
  %242 = extractelement <126 x float> %157, i32 84
  %243 = extractelement <126 x float> %157, i32 85
  %244 = extractelement <126 x float> %157, i32 86
  %245 = extractelement <126 x float> %157, i32 87
  %246 = extractelement <126 x float> %157, i32 88
  %247 = extractelement <126 x float> %157, i32 89
  %248 = extractelement <126 x float> %157, i32 90
  %249 = extractelement <126 x float> %157, i32 91
  %250 = extractelement <126 x float> %157, i32 92
  %251 = extractelement <126 x float> %157, i32 93
  %252 = extractelement <126 x float> %157, i32 94
  %253 = extractelement <126 x float> %157, i32 95
  %254 = extractelement <126 x float> %157, i32 96
  %255 = extractelement <126 x float> %157, i32 97
  %256 = extractelement <126 x float> %157, i32 98
  %257 = extractelement <126 x float> %157, i32 99
  %258 = extractelement <126 x float> %157, i32 100
  %259 = extractelement <126 x float> %157, i32 101
  %260 = extractelement <126 x float> %157, i32 102
  %261 = extractelement <126 x float> %157, i32 103
  %262 = extractelement <126 x float> %157, i32 104
  %263 = extractelement <126 x float> %157, i32 105
  %264 = extractelement <126 x float> %157, i32 106
  %265 = extractelement <126 x float> %157, i32 107
  %266 = extractelement <126 x float> %157, i32 108
  %267 = extractelement <126 x float> %157, i32 109
  %268 = extractelement <126 x float> %157, i32 110
  %269 = extractelement <126 x float> %157, i32 111
  %270 = extractelement <126 x float> %157, i32 112
  %271 = extractelement <126 x float> %157, i32 113
  %272 = extractelement <126 x float> %157, i32 114
  %273 = extractelement <126 x float> %157, i32 115
  %274 = extractelement <126 x float> %157, i32 116
  %275 = extractelement <126 x float> %157, i32 117
  %276 = extractelement <126 x float> %157, i32 118
  %277 = extractelement <126 x float> %157, i32 119
  %278 = extractelement <126 x float> %157, i32 120
  %279 = extractelement <126 x float> %157, i32 121
  %280 = extractelement <126 x float> %157, i32 122
  %281 = extractelement <126 x float> %157, i32 123
  %282 = extractelement <126 x float> %157, i32 124
  %283 = extractelement <126 x float> %157, i32 125
  %284 = bitcast float %temp4.0 to i32
  %285 = add i32 %284, 1
  %286 = bitcast i32 %285 to float
  br label %LOOP
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }

!0 = !{!"const", null, i32 1}