[llvm] r279625 - Create subranges for new intervals resulting from live interval splitting

Wed Aug 24 23:52:27 PDT 2016

Hi Krzysztof,

On 24/08/16 10:37 PM, Krzysztof Parzyszek via llvm-commits wrote:
> Author: kparzysz
> Date: Wed Aug 24 08:37:55 2016
> New Revision: 279625
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=279625&view=rev
> Log:
> Create subranges for new intervals resulting from live interval splitting

This change broke several piglit tests using the Mesa radeonsi driver.
I'm attaching two examples of failing LLVM IR, below is the
corresponding output from llc:

llc: ../lib/CodeGen/RegisterCoalescer.cpp:2021: {anonymous}::JoinVals::ConflictResolution {anonymous}::JoinVals::analyzeValue(unsigned int, {anonymous}::JoinVals&): Assertion `DefMI != nullptr' failed.
#0 0x00007f147fdf9d78 llvm::sys::PrintStackTrace(llvm::raw_ostream&) /home/daenzer/src/llvm-git/llvm/build-amd64/../lib/Support/Unix/Signals.inc:404:0
#1 0x00007f147fdf7bbe llvm::sys::RunSignalHandlers() /home/daenzer/src/llvm-git/llvm/build-amd64/../lib/Support/Signals.cpp:45:0
#2 0x00007f147fdf7d09 SignalHandler(int) /home/daenzer/src/llvm-git/llvm/build-amd64/../lib/Support/Unix/Signals.inc:258:0
#3 0x00007f147f6daed0 __restore_rt (/lib/x86_64-linux-gnu/libpthread.so.0+0x10ed0)
#4 0x00007f147eaad1c8 gsignal (/lib/x86_64-linux-gnu/libc.so.6+0x331c8)
#5 0x00007f147eaae64a abort (/lib/x86_64-linux-gnu/libc.so.6+0x3464a)
#6 0x00007f147eaa6107 (/lib/x86_64-linux-gnu/libc.so.6+0x2c107)
#7 0x00007f147eaa61b2 (/lib/x86_64-linux-gnu/libc.so.6+0x2c1b2)
#8 0x00007f14801584bf analyzeValue /home/daenzer/src/llvm-git/llvm/build-amd64/../lib/CodeGen/RegisterCoalescer.cpp:2112:0
#9 0x00007f14801584bf (anonymous namespace)::JoinVals::computeAssignment(unsigned int, (anonymous namespace)::JoinVals&) /home/daenzer/src/llvm-git/llvm/build-amd64/../lib/CodeGen/RegisterCoalescer.cpp:2233:0
#10 0x00007f14801585b6 begin /home/daenzer/src/llvm-git/llvm/build-amd64/../include/llvm/ADT/SmallVector.h:115:0
#11 0x00007f14801585b6 size /home/daenzer/src/llvm-git/llvm/build-amd64/../include/llvm/ADT/SmallVector.h:132:0
#12 0x00007f14801585b6 operator[] /home/daenzer/src/llvm-git/llvm/build-amd64/../include/llvm/ADT/SmallVector.h:145:0
#13 0x00007f14801585b6 (anonymous namespace)::JoinVals::mapValues((anonymous namespace)::JoinVals&) /home/daenzer/src/llvm-git/llvm/build-amd64/../lib/CodeGen/RegisterCoalescer.cpp:2269:0
#14 0x00007f148015b8e1 joinSubRegRanges /home/daenzer/src/llvm-git/llvm/build-amd64/../lib/CodeGen/RegisterCoalescer.cpp:2657:0
#15 0x00007f148015b8e1 (anonymous namespace)::RegisterCoalescer::mergeSubRangeInto(llvm::LiveInterval&, llvm::LiveRange const&, unsigned int, llvm::CoalescerPair&) /home/daenzer/src/llvm-git/llvm/build-amd64/../lib/CodeGen/RegisterCoalescer.cpp:2728:0
#16 0x00007f148015f9d9 llvm::LiveInterval::SingleLinkedListIterator<llvm::LiveInterval::SubRange>::operator++() /home/daenzer/src/llvm-git/llvm/build-amd64/../include/llvm/CodeGen/LiveInterval.h:673:0
#17 0x00007f148015f9d9 joinVirtRegs /home/daenzer/src/llvm-git/llvm/build-amd64/../lib/CodeGen/RegisterCoalescer.cpp:2792:0
#18 0x00007f148015f9d9 joinIntervals /home/daenzer/src/llvm-git/llvm/build-amd64/../lib/CodeGen/RegisterCoalescer.cpp:2844:0
#19 0x00007f148015f9d9 (anonymous namespace)::RegisterCoalescer::joinCopy(llvm::MachineInstr*, bool&) [clone .constprop.292] /home/daenzer/src/llvm-git/llvm/build-amd64/../lib/CodeGen/RegisterCoalescer.cpp:1468:0
#20 0x00007f1480161d8f (anonymous namespace)::RegisterCoalescer::copyCoalesceWorkList(llvm::MutableArrayRef<llvm::MachineInstr*>) /home/daenzer/src/llvm-git/llvm/build-amd64/../lib/CodeGen/RegisterCoalescer.cpp:2916:0
#21 0x00007f1480161e0e llvm::SmallVectorTemplateCommon<llvm::MachineInstr*, void>::begin() const /home/daenzer/src/llvm-git/llvm/build-amd64/../include/llvm/ADT/SmallVector.h:115:0
#22 0x00007f1480161e0e llvm::SmallVectorTemplateCommon<llvm::MachineInstr*, void>::size() const /home/daenzer/src/llvm-git/llvm/build-amd64/../include/llvm/ADT/SmallVector.h:132:0
#23 0x00007f1480161e0e (anonymous namespace)::RegisterCoalescer::coalesceLocals() /home/daenzer/src/llvm-git/llvm/build-amd64/../lib/CodeGen/RegisterCoalescer.cpp:3040:0
#24 0x00007f1480163014 joinAllIntervals /home/daenzer/src/llvm-git/llvm/build-amd64/../lib/CodeGen/RegisterCoalescer.cpp:3070:0
#25 0x00007f1480163014 (anonymous namespace)::RegisterCoalescer::runOnMachineFunction(llvm::MachineFunction&) /home/daenzer/src/llvm-git/llvm/build-amd64/../lib/CodeGen/RegisterCoalescer.cpp:3115:0
#26 0x00007f1480060b24 llvm::MachineFunctionPass::runOnFunction(llvm::Function&) /home/daenzer/src/llvm-git/llvm/build-amd64/../lib/CodeGen/MachineFunctionPass.cpp:62:0
#27 0x00007f147fed1c49 llvm::FPPassManager::runOnFunction(llvm::Function&) /home/daenzer/src/llvm-git/llvm/build-amd64/../lib/IR/LegacyPassManager.cpp:1522:0
#28 0x00007f147fed1cec llvm::FPPassManager::runOnModule(llvm::Module&) /home/daenzer/src/llvm-git/llvm/build-amd64/../lib/IR/LegacyPassManager.cpp:1543:0
#29 0x00007f147fed234c runOnModule /home/daenzer/src/llvm-git/llvm/build-amd64/../lib/IR/LegacyPassManager.cpp:1599:0
#30 0x00007f147fed234c llvm::legacy::PassManagerImpl::run(llvm::Module&) /home/daenzer/src/llvm-git/llvm/build-amd64/../lib/IR/LegacyPassManager.cpp:1702:0
#31 0x0000000000416815 compileModule(char**, llvm::LLVMContext&) /home/daenzer/src/llvm-git/llvm/build-amd64/../tools/llc/llc.cpp:508:0
#32 0x000000000040a968 main /home/daenzer/src/llvm-git/llvm/build-amd64/../tools/llc/llc.cpp:273:0
#33 0x00007f147ea9a730 __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x20730)
#34 0x000000000040a9d9 _start (/home/daenzer/src/llvm-git/llvm/build-amd64/bin/llc+0x40a9d9)
Stack dump:
0.      Program arguments: /home/daenzer/src/llvm-git/llvm/build-amd64/bin/llc -march=amdgcn -mcpu=kaveri
1.      Running pass 'Function Pass Manager' on module '<stdin>'.
2.      Running pass 'Simple Register Coalescing' on function '@main'

** Couldn't join subrange!

UNREACHABLE executed at ../lib/CodeGen/RegisterCoalescer.cpp:2666!
#0 0x00007f7702839d78 llvm::sys::PrintStackTrace(llvm::raw_ostream&) /home/daenzer/src/llvm-git/llvm/build-amd64/../lib/Support/Unix/Signals.inc:404:0
#1 0x00007f7702837bbe llvm::sys::RunSignalHandlers() /home/daenzer/src/llvm-git/llvm/build-amd64/../lib/Support/Signals.cpp:45:0
#2 0x00007f7702837d09 SignalHandler(int) /home/daenzer/src/llvm-git/llvm/build-amd64/../lib/Support/Unix/Signals.inc:258:0
#3 0x00007f770211aed0 __restore_rt (/lib/x86_64-linux-gnu/libpthread.so.0+0x10ed0)
#4 0x00007f77014ed1c8 gsignal (/lib/x86_64-linux-gnu/libc.so.6+0x331c8)
#5 0x00007f77014ee64a abort (/lib/x86_64-linux-gnu/libc.so.6+0x3464a)
#6 0x00007f77027d7bc6 (/home/daenzer/src/llvm-git/llvm/build-amd64/bin/../lib/libLLVM-4.0svn.so+0x4adbc6)
#7 0x00007f7702b9bcde joinSubRegRanges /home/daenzer/src/llvm-git/llvm/build-amd64/../lib/CodeGen/RegisterCoalescer.cpp:2666:0
#8 0x00007f7702b9bcde (anonymous namespace)::RegisterCoalescer::mergeSubRangeInto(llvm::LiveInterval&, llvm::LiveRange const&, unsigned int, llvm::CoalescerPair&) /home/daenzer/src/llvm-git/llvm/build-amd64/../lib/CodeGen/RegisterCoalescer.cpp:2728:0
#9 0x00007f7702b9f9d9 llvm::LiveInterval::SingleLinkedListIterator<llvm::LiveInterval::SubRange>::operator++() /home/daenzer/src/llvm-git/llvm/build-amd64/../include/llvm/CodeGen/LiveInterval.h:673:0
#10 0x00007f7702b9f9d9 joinVirtRegs /home/daenzer/src/llvm-git/llvm/build-amd64/../lib/CodeGen/RegisterCoalescer.cpp:2792:0
#11 0x00007f7702b9f9d9 joinIntervals /home/daenzer/src/llvm-git/llvm/build-amd64/../lib/CodeGen/RegisterCoalescer.cpp:2844:0
#12 0x00007f7702b9f9d9 (anonymous namespace)::RegisterCoalescer::joinCopy(llvm::MachineInstr*, bool&) [clone .constprop.292] /home/daenzer/src/llvm-git/llvm/build-amd64/../lib/CodeGen/RegisterCoalescer.cpp:1468:0
#13 0x00007f7702ba1d8f (anonymous namespace)::RegisterCoalescer::copyCoalesceWorkList(llvm::MutableArrayRef<llvm::MachineInstr*>) /home/daenzer/src/llvm-git/llvm/build-amd64/../lib/CodeGen/RegisterCoalescer.cpp:2916:0
#14 0x00007f7702ba3031 joinAllIntervals /home/daenzer/src/llvm-git/llvm/build-amd64/../lib/CodeGen/RegisterCoalescer.cpp:3074:0
#15 0x00007f7702ba3031 (anonymous namespace)::RegisterCoalescer::runOnMachineFunction(llvm::MachineFunction&) /home/daenzer/src/llvm-git/llvm/build-amd64/../lib/CodeGen/RegisterCoalescer.cpp:3115:0
#16 0x00007f7702aa0b24 llvm::MachineFunctionPass::runOnFunction(llvm::Function&) /home/daenzer/src/llvm-git/llvm/build-amd64/../lib/CodeGen/MachineFunctionPass.cpp:62:0
#17 0x00007f7702911c49 llvm::FPPassManager::runOnFunction(llvm::Function&) /home/daenzer/src/llvm-git/llvm/build-amd64/../lib/IR/LegacyPassManager.cpp:1522:0
#18 0x00007f7702911cec llvm::FPPassManager::runOnModule(llvm::Module&) /home/daenzer/src/llvm-git/llvm/build-amd64/../lib/IR/LegacyPassManager.cpp:1543:0
#19 0x00007f770291234c runOnModule /home/daenzer/src/llvm-git/llvm/build-amd64/../lib/IR/LegacyPassManager.cpp:1599:0
#20 0x00007f770291234c llvm::legacy::PassManagerImpl::run(llvm::Module&) /home/daenzer/src/llvm-git/llvm/build-amd64/../lib/IR/LegacyPassManager.cpp:1702:0
#21 0x0000000000416815 compileModule(char**, llvm::LLVMContext&) /home/daenzer/src/llvm-git/llvm/build-amd64/../tools/llc/llc.cpp:508:0
#22 0x000000000040a968 main /home/daenzer/src/llvm-git/llvm/build-amd64/../tools/llc/llc.cpp:273:0
#23 0x00007f77014da730 __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x20730)
#24 0x000000000040a9d9 _start (/home/daenzer/src/llvm-git/llvm/build-amd64/bin/llc+0x40a9d9)
Stack dump:
0.      Program arguments: /home/daenzer/src/llvm-git/llvm/build-amd64/bin/llc -march=amdgcn -mcpu=kaveri
1.      Running pass 'Function Pass Manager' on module '<stdin>'.
2.      Running pass 'Simple Register Coalescing' on function '@main'

-- 
Earthling Michel Dänzer               |               http://www.amd.com
Libre software enthusiast             |             Mesa and X developer
-------------- next part --------------
; ModuleID = 'tgsi'
source_filename = "tgsi"
target triple = "amdgcn--"

define amdgpu_gs void @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) {
main_body:
  %15 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0
  %16 = load <16 x i8>, <16 x i8> addrspace(2)* %15, align 16, !invariant.load !0
  %17 = call float @llvm.SI.load.const(<16 x i8> %16, i32 0)
  %18 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1, !amdgpu.uniform !0
  %19 = load <16 x i8>, <16 x i8> addrspace(2)* %18, align 16, !invariant.load !0
  %20 = call float @llvm.SI.load.const(<16 x i8> %19, i32 16)
  %21 = call float @llvm.SI.load.const(<16 x i8> %19, i32 20)
  %22 = call float @llvm.SI.load.const(<16 x i8> %19, i32 24)
  %23 = call float @llvm.SI.load.const(<16 x i8> %19, i32 28)
  %24 = call float @llvm.SI.load.const(<16 x i8> %19, i32 32)
  %25 = call float @llvm.SI.load.const(<16 x i8> %19, i32 36)
  %26 = call float @llvm.SI.load.const(<16 x i8> %19, i32 40)
  %27 = call float @llvm.SI.load.const(<16 x i8> %19, i32 44)
  %28 = call float @llvm.SI.load.const(<16 x i8> %19, i32 48)
  %29 = call float @llvm.SI.load.const(<16 x i8> %19, i32 52)
  %30 = call float @llvm.SI.load.const(<16 x i8> %19, i32 56)
  %31 = call float @llvm.SI.load.const(<16 x i8> %19, i32 60)
  %32 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %0, i64 0, i64 3, !amdgpu.uniform !0
  %33 = load <16 x i8>, <16 x i8> addrspace(2)* %32, align 16, !invariant.load !0
  %34 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %0, i64 0, i64 4, !amdgpu.uniform !0
  %35 = load <16 x i8>, <16 x i8> addrspace(2)* %34, align 16, !invariant.load !0
  %36 = bitcast float %17 to i32
  %array_vector1 = insertelement <4 x float> <float 1.000000e+00, float undef, float undef, float undef>, float %20, i32 1
  %array_vector2 = insertelement <4 x float> %array_vector1, float %24, i32 2
  %array_vector3 = insertelement <4 x float> %array_vector2, float %28, i32 3
  %array_vector5 = insertelement <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, float %21, i32 1
  %array_vector6 = insertelement <4 x float> %array_vector5, float %25, i32 2
  %array_vector7 = insertelement <4 x float> %array_vector6, float %29, i32 3
  %array_vector9 = insertelement <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, float %22, i32 1
  %array_vector10 = insertelement <4 x float> %array_vector9, float %26, i32 2
  %array_vector11 = insertelement <4 x float> %array_vector10, float %30, i32 3
  %array_vector13 = insertelement <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, float %23, i32 1
  %array_vector14 = insertelement <4 x float> %array_vector13, float %27, i32 2
  %array_vector15 = insertelement <4 x float> %array_vector14, float %31, i32 3
  %37 = shl i32 %7, 2
  %38 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %33, i32 %37, i32 4096, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0)
  %39 = shl i32 %7, 2
  %40 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %33, i32 %39, i32 4352, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0)
  %41 = shl i32 %7, 2
  %42 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %33, i32 %41, i32 4608, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0)
  %43 = shl i32 %7, 2
  %44 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %33, i32 %43, i32 4864, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0)
  call void @llvm.AMDGPU.kill(float 1.000000e+00)
  call void @llvm.SI.tbuffer.store.i32(<16 x i8> %35, i32 %38, i32 1, i32 0, i32 %5, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
  call void @llvm.SI.tbuffer.store.i32(<16 x i8> %35, i32 %40, i32 1, i32 12, i32 %5, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
  call void @llvm.SI.tbuffer.store.i32(<16 x i8> %35, i32 %42, i32 1, i32 24, i32 %5, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
  call void @llvm.SI.tbuffer.store.i32(<16 x i8> %35, i32 %44, i32 1, i32 36, i32 %5, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
  %bc = bitcast <4 x float> %array_vector3 to <4 x i32>
  %45 = extractelement <4 x i32> %bc, i32 %36
  call void @llvm.SI.tbuffer.store.i32(<16 x i8> %35, i32 %45, i32 1, i32 48, i32 %5, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
  %bc48 = bitcast <4 x float> %array_vector7 to <4 x i32>
  %46 = extractelement <4 x i32> %bc48, i32 %36
  call void @llvm.SI.tbuffer.store.i32(<16 x i8> %35, i32 %46, i32 1, i32 60, i32 %5, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
  %bc49 = bitcast <4 x float> %array_vector11 to <4 x i32>
  %47 = extractelement <4 x i32> %bc49, i32 %36
  call void @llvm.SI.tbuffer.store.i32(<16 x i8> %35, i32 %47, i32 1, i32 72, i32 %5, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
  %bc50 = bitcast <4 x float> %array_vector15 to <4 x i32>
  %48 = extractelement <4 x i32> %bc50, i32 %36
  call void @llvm.SI.tbuffer.store.i32(<16 x i8> %35, i32 %48, i32 1, i32 84, i32 %5, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
  call void @llvm.SI.sendmsg(i32 34, i32 %6)
  %49 = bitcast float %17 to i32
  %array_vector17 = insertelement <4 x float> <float 1.000000e+00, float undef, float undef, float undef>, float %20, i32 1
  %array_vector18 = insertelement <4 x float> %array_vector17, float %24, i32 2
  %array_vector19 = insertelement <4 x float> %array_vector18, float %28, i32 3
  %array_vector21 = insertelement <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, float %21, i32 1
  %array_vector22 = insertelement <4 x float> %array_vector21, float %25, i32 2
  %array_vector23 = insertelement <4 x float> %array_vector22, float %29, i32 3
  %array_vector25 = insertelement <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, float %22, i32 1
  %array_vector26 = insertelement <4 x float> %array_vector25, float %26, i32 2
  %array_vector27 = insertelement <4 x float> %array_vector26, float %30, i32 3
  %array_vector29 = insertelement <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, float %23, i32 1
  %array_vector30 = insertelement <4 x float> %array_vector29, float %27, i32 2
  %array_vector31 = insertelement <4 x float> %array_vector30, float %31, i32 3
  %50 = shl i32 %8, 2
  %51 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %33, i32 %50, i32 4096, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0)
  %52 = shl i32 %8, 2
  %53 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %33, i32 %52, i32 4352, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0)
  %54 = shl i32 %8, 2
  %55 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %33, i32 %54, i32 4608, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0)
  %56 = shl i32 %8, 2
  %57 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %33, i32 %56, i32 4864, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0)
  call void @llvm.AMDGPU.kill(float 1.000000e+00)
  call void @llvm.SI.tbuffer.store.i32(<16 x i8> %35, i32 %51, i32 1, i32 4, i32 %5, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
  call void @llvm.SI.tbuffer.store.i32(<16 x i8> %35, i32 %53, i32 1, i32 16, i32 %5, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
  call void @llvm.SI.tbuffer.store.i32(<16 x i8> %35, i32 %55, i32 1, i32 28, i32 %5, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
  call void @llvm.SI.tbuffer.store.i32(<16 x i8> %35, i32 %57, i32 1, i32 40, i32 %5, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
  %bc51 = bitcast <4 x float> %array_vector19 to <4 x i32>
  %58 = extractelement <4 x i32> %bc51, i32 %49
  call void @llvm.SI.tbuffer.store.i32(<16 x i8> %35, i32 %58, i32 1, i32 52, i32 %5, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
  %bc52 = bitcast <4 x float> %array_vector23 to <4 x i32>
  %59 = extractelement <4 x i32> %bc52, i32 %49
  call void @llvm.SI.tbuffer.store.i32(<16 x i8> %35, i32 %59, i32 1, i32 64, i32 %5, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
  %bc53 = bitcast <4 x float> %array_vector27 to <4 x i32>
  %60 = extractelement <4 x i32> %bc53, i32 %49
  call void @llvm.SI.tbuffer.store.i32(<16 x i8> %35, i32 %60, i32 1, i32 76, i32 %5, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
  %bc54 = bitcast <4 x float> %array_vector31 to <4 x i32>
  %61 = extractelement <4 x i32> %bc54, i32 %49
  call void @llvm.SI.tbuffer.store.i32(<16 x i8> %35, i32 %61, i32 1, i32 88, i32 %5, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
  call void @llvm.SI.sendmsg(i32 34, i32 %6)
  %62 = bitcast float %17 to i32
  %array_vector33 = insertelement <4 x float> <float 1.000000e+00, float undef, float undef, float undef>, float %20, i32 1
  %array_vector34 = insertelement <4 x float> %array_vector33, float %24, i32 2
  %array_vector35 = insertelement <4 x float> %array_vector34, float %28, i32 3
  %array_vector37 = insertelement <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, float %21, i32 1
  %array_vector38 = insertelement <4 x float> %array_vector37, float %25, i32 2
  %array_vector39 = insertelement <4 x float> %array_vector38, float %29, i32 3
  %array_vector41 = insertelement <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, float %22, i32 1
  %array_vector42 = insertelement <4 x float> %array_vector41, float %26, i32 2
  %array_vector43 = insertelement <4 x float> %array_vector42, float %30, i32 3
  %array_vector45 = insertelement <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, float %23, i32 1
  %array_vector46 = insertelement <4 x float> %array_vector45, float %27, i32 2
  %array_vector47 = insertelement <4 x float> %array_vector46, float %31, i32 3
  %63 = shl i32 %10, 2
  %64 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %33, i32 %63, i32 4096, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0)
  %65 = shl i32 %10, 2
  %66 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %33, i32 %65, i32 4352, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0)
  %67 = shl i32 %10, 2
  %68 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %33, i32 %67, i32 4608, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0)
  %69 = shl i32 %10, 2
  %70 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %33, i32 %69, i32 4864, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0)
  call void @llvm.AMDGPU.kill(float 1.000000e+00)
  call void @llvm.SI.tbuffer.store.i32(<16 x i8> %35, i32 %64, i32 1, i32 8, i32 %5, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
  call void @llvm.SI.tbuffer.store.i32(<16 x i8> %35, i32 %66, i32 1, i32 20, i32 %5, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
  call void @llvm.SI.tbuffer.store.i32(<16 x i8> %35, i32 %68, i32 1, i32 32, i32 %5, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
  call void @llvm.SI.tbuffer.store.i32(<16 x i8> %35, i32 %70, i32 1, i32 44, i32 %5, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
  %bc55 = bitcast <4 x float> %array_vector35 to <4 x i32>
  %71 = extractelement <4 x i32> %bc55, i32 %62
  call void @llvm.SI.tbuffer.store.i32(<16 x i8> %35, i32 %71, i32 1, i32 56, i32 %5, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
  %bc56 = bitcast <4 x float> %array_vector39 to <4 x i32>
  %72 = extractelement <4 x i32> %bc56, i32 %62
  call void @llvm.SI.tbuffer.store.i32(<16 x i8> %35, i32 %72, i32 1, i32 68, i32 %5, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
  %bc57 = bitcast <4 x float> %array_vector43 to <4 x i32>
  %73 = extractelement <4 x i32> %bc57, i32 %62
  call void @llvm.SI.tbuffer.store.i32(<16 x i8> %35, i32 %73, i32 1, i32 80, i32 %5, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
  %bc58 = bitcast <4 x float> %array_vector47 to <4 x i32>
  %74 = extractelement <4 x i32> %bc58, i32 %62
  call void @llvm.SI.tbuffer.store.i32(<16 x i8> %35, i32 %74, i32 1, i32 92, i32 %5, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
  call void @llvm.SI.sendmsg(i32 34, i32 %6)
  call void @llvm.SI.sendmsg(i32 3, i32 %6)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #0

; Function Attrs: nounwind readonly
declare i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1

; Function Attrs: nounwind
declare void @llvm.AMDGPU.kill(float) #2

; Function Attrs: nounwind
declare void @llvm.SI.tbuffer.store.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) #2

; Function Attrs: nounwind
declare void @llvm.SI.sendmsg(i32, i32) #2

attributes #0 = { nounwind readnone }
attributes #1 = { nounwind readonly }
attributes #2 = { nounwind }

!0 = !{}
-------------- next part --------------
; ModuleID = 'tgsi'
source_filename = "tgsi"
target triple = "amdgcn--"

define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 {
main_body:
  %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0
  %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !invariant.load !0
  %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 32)
  %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 36)
  %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 48)
  %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 52)
  %29 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0, !amdgpu.uniform !0
  %30 = load <8 x i32>, <8 x i32> addrspace(2)* %29, align 32, !invariant.load !0
  %31 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)*
  %32 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %31, i64 0, i64 3, !amdgpu.uniform !0
  %33 = load <4 x i32>, <4 x i32> addrspace(2)* %32, align 16, !invariant.load !0
  %34 = extractelement <8 x i32> %30, i32 7
  %35 = extractelement <4 x i32> %33, i32 0
  %36 = and i32 %35, %34
  %37 = insertelement <4 x i32> %33, i32 %36, i32 0
  %38 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2, !amdgpu.uniform !0
  %39 = load <8 x i32>, <8 x i32> addrspace(2)* %38, align 32, !invariant.load !0
  %40 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)*
  %41 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %40, i64 0, i64 7, !amdgpu.uniform !0
  %42 = load <4 x i32>, <4 x i32> addrspace(2)* %41, align 16, !invariant.load !0
  %43 = extractelement <8 x i32> %39, i32 7
  %44 = extractelement <4 x i32> %42, i32 0
  %45 = and i32 %44, %43
  %46 = insertelement <4 x i32> %42, i32 %45, i32 0
  %47 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8)
  %48 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8)
  %49 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8)
  %50 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8)
  %51 = fadd float %50, 0xBFA99999A0000000
  %52 = fadd float %47, 0.000000e+00
  %53 = fadd float %48, 0.000000e+00
  %54 = fadd float %49, 0.000000e+00
  %55 = fadd float %50, 0x3FA99999A0000000
  %56 = bitcast float %51 to i32
  %57 = bitcast float %25 to i32
  %58 = bitcast float %26 to i32
  %59 = bitcast float %27 to i32
  %60 = bitcast float %28 to i32
  %61 = bitcast float %47 to i32
  %62 = bitcast float %48 to i32
  %63 = bitcast float %49 to i32
  %64 = insertelement <16 x i32> <i32 212739, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>, i32 %56, i32 1
  %65 = insertelement <16 x i32> %64, i32 %57, i32 2
  %66 = insertelement <16 x i32> %65, i32 %58, i32 3
  %67 = insertelement <16 x i32> %66, i32 %59, i32 4
  %68 = insertelement <16 x i32> %67, i32 %60, i32 5
  %69 = insertelement <16 x i32> %68, i32 %61, i32 6
  %70 = insertelement <16 x i32> %69, i32 %62, i32 7
  %71 = insertelement <16 x i32> %70, i32 %63, i32 8
  %72 = call <4 x float> @llvm.SI.image.sample.c.d.o.v16i32(<16 x i32> %71, <8 x i32> %30, <4 x i32> %37, i32 15, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
  %73 = extractelement <4 x float> %72, i32 0
  %74 = bitcast float %55 to i32
  %75 = bitcast float %25 to i32
  %76 = bitcast float %26 to i32
  %77 = bitcast float %27 to i32
  %78 = bitcast float %28 to i32
  %79 = bitcast float %52 to i32
  %80 = bitcast float %53 to i32
  %81 = bitcast float %54 to i32
  %82 = insertelement <16 x i32> <i32 212739, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>, i32 %74, i32 1
  %83 = insertelement <16 x i32> %82, i32 %75, i32 2
  %84 = insertelement <16 x i32> %83, i32 %76, i32 3
  %85 = insertelement <16 x i32> %84, i32 %77, i32 4
  %86 = insertelement <16 x i32> %85, i32 %78, i32 5
  %87 = insertelement <16 x i32> %86, i32 %79, i32 6
  %88 = insertelement <16 x i32> %87, i32 %80, i32 7
  %89 = insertelement <16 x i32> %88, i32 %81, i32 8
  %90 = call <4 x float> @llvm.SI.image.sample.c.d.o.v16i32(<16 x i32> %89, <8 x i32> %39, <4 x i32> %46, i32 15, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
  %91 = extractelement <4 x float> %90, i32 0
  %92 = fmul float %73, %91
  %93 = bitcast float %5 to i32
  %94 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %93, 10
  %95 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %94, float %92, 11
  %96 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %95, float %92, 12
  %97 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %96, float %92, 13
  %98 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %97, float %92, 14
  %99 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %98, float %21, 24
  ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %99
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.image.sample.c.d.o.v16i32(<16 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1

attributes #0 = { "InitialPSInputAddr"="36983" }
attributes #1 = { nounwind readnone }

!0 = !{}