[llvm] r321556 - AMDGPU: Remove mayLoad/hasSideEffects from MIMG stores
Michel Dänzer via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 3 08:18:16 PST 2018
Hi Matt,
On 2017-12-29 06:18 PM, Matt Arsenault via llvm-commits wrote:
> Author: arsenm
> Date: Fri Dec 29 09:18:18 2017
> New Revision: 321556
>
> URL: http://llvm.org/viewvc/llvm-project?rev=321556&view=rev
> Log:
> AMDGPU: Remove mayLoad/hasSideEffects from MIMG stores
>
> Atomics still have hasSideEffects set on them because
> of the mess that is the memory properties.
This change broke the piglit test
"spec at arb_shader_image_load_store@restrict at no qualifier image aliasing
test" for me on Tonga.
I'm attaching the pre-optimization LLVM IR and the good and bad disassembly.
--
Earthling Michel Dänzer | http://www.amd.com
Libre software enthusiast | Mesa and X developer
-------------- next part --------------
; ModuleID = 'tgsi'
source_filename = "tgsi"
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
target triple = "amdgcn--"
; Function Attrs: alwaysinline
define private amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([12 x <4 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), [0 x <4 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), [80 x <8 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 {
main_body:
%TEMP11.w = alloca float
%TEMP11.z = alloca float
%TEMP11.y = alloca float
%TEMP11.x = alloca float
%TEMP10.w = alloca float
%TEMP10.z = alloca float
%TEMP10.y = alloca float
%TEMP10.x = alloca float
%TEMP9.w = alloca float
%TEMP9.z = alloca float
%TEMP9.y = alloca float
%TEMP9.x = alloca float
%TEMP8.w = alloca float
%TEMP8.z = alloca float
%TEMP8.y = alloca float
%TEMP8.x = alloca float
%TEMP7.w = alloca float
%TEMP7.z = alloca float
%TEMP7.y = alloca float
%TEMP7.x = alloca float
%TEMP6.w = alloca float
%TEMP6.z = alloca float
%TEMP6.y = alloca float
%TEMP6.x = alloca float
%TEMP5.w = alloca float
%TEMP5.z = alloca float
%TEMP5.y = alloca float
%TEMP5.x = alloca float
%TEMP4.w = alloca float
%TEMP4.z = alloca float
%TEMP4.y = alloca float
%TEMP4.x = alloca float
%TEMP3.w = alloca float
%TEMP3.z = alloca float
%TEMP3.y = alloca float
%TEMP3.x = alloca float
%TEMP2.w = alloca float
%TEMP2.z = alloca float
%TEMP2.y = alloca float
%TEMP2.x = alloca float
%TEMP1.w = alloca float
%TEMP1.z = alloca float
%TEMP1.y = alloca float
%TEMP1.x = alloca float
%TEMP0.w = alloca float
%TEMP0.z = alloca float
%TEMP0.y = alloca float
%TEMP0.x = alloca float
%OUT0.w = alloca float
%OUT0.z = alloca float
%OUT0.y = alloca float
%OUT0.x = alloca float
store float 0.000000e+00, float* %TEMP0.x
br label %loop1
loop1: ; preds = %endif5, %main_body
%22 = load float, float* %TEMP0.x
%23 = bitcast float %22 to i32
%24 = icmp sge i32 %23, 768
%25 = sext i1 %24 to i32
%26 = bitcast i32 %25 to float
store float %26, float* %TEMP1.x
%27 = load float, float* %TEMP1.x
%28 = bitcast float %27 to i32
%29 = icmp ne i32 %28, 0
br i1 %29, label %if3, label %endif5
if3: ; preds = %loop1
br label %endloop19
endif5: ; preds = %loop1
store float 0x36E0000000000000, float* %TEMP2.x
%30 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %3, i32 0, i32 15, !amdgpu.uniform !0
%31 = load <8 x i32>, <8 x i32> addrspace(2)* %30, !invariant.load !0
%32 = call nsz <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32 16, <8 x i32> %31, i32 15, i1 false, i1 false, i1 false, i1 false) #1
%33 = extractelement <4 x float> %32, i32 0
%34 = extractelement <4 x float> %32, i32 1
%35 = extractelement <4 x float> %32, i32 2
%36 = extractelement <4 x float> %32, i32 3
store float %33, float* %TEMP3.x
store float %34, float* %TEMP3.y
store float %35, float* %TEMP3.z
store float %36, float* %TEMP3.w
%37 = load float, float* %TEMP0.x
%38 = bitcast float %37 to i32
%39 = mul i32 2, %38
%40 = bitcast i32 %39 to float
store float %40, float* %TEMP4.x
%41 = load float, float* %TEMP4.x
store float %41, float* %TEMP5.x
%42 = load float, float* %TEMP3.x
%43 = bitcast float %42 to i32
%44 = add i32 %43, 1
%45 = load float, float* %TEMP3.y
%46 = bitcast float %45 to i32
%47 = add i32 %46, 1
%48 = load float, float* %TEMP3.z
%49 = bitcast float %48 to i32
%50 = add i32 %49, 1
%51 = load float, float* %TEMP3.w
%52 = bitcast float %51 to i32
%53 = add i32 %52, 1
%54 = bitcast i32 %44 to float
store float %54, float* %TEMP6.x
%55 = bitcast i32 %47 to float
store float %55, float* %TEMP6.y
%56 = bitcast i32 %50 to float
store float %56, float* %TEMP6.z
%57 = bitcast i32 %53 to float
store float %57, float* %TEMP6.w
%58 = load float, float* %TEMP6.x
%59 = load float, float* %TEMP6.y
%60 = load float, float* %TEMP6.z
%61 = load float, float* %TEMP6.w
%62 = insertelement <4 x float> undef, float %58, i32 0
%63 = insertelement <4 x float> %62, float %59, i32 1
%64 = insertelement <4 x float> %63, float %60, i32 2
%65 = insertelement <4 x float> %64, float %61, i32 3
%66 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %3, i32 0, i32 14, !amdgpu.uniform !0
%67 = load <8 x i32>, <8 x i32> addrspace(2)* %66, !invariant.load !0
%68 = extractelement <8 x i32> %67, i32 6
%69 = and i32 %68, -2097153
%70 = insertelement <8 x i32> %67, i32 %69, i32 6
%71 = load float, float* %TEMP4.x
%72 = bitcast float %71 to i32
call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %65, i32 %72, <8 x i32> %70, i32 15, i1 false, i1 false, i1 false, i1 false) #2
store float 0x36E0000000000000, float* %TEMP7.x
%73 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %3, i32 0, i32 15, !amdgpu.uniform !0
%74 = load <8 x i32>, <8 x i32> addrspace(2)* %73, !invariant.load !0
%75 = call nsz <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32 16, <8 x i32> %74, i32 15, i1 false, i1 false, i1 false, i1 false) #1
%76 = extractelement <4 x float> %75, i32 0
%77 = extractelement <4 x float> %75, i32 1
%78 = extractelement <4 x float> %75, i32 2
%79 = extractelement <4 x float> %75, i32 3
store float %76, float* %TEMP8.x
store float %77, float* %TEMP8.y
store float %78, float* %TEMP8.z
store float %79, float* %TEMP8.w
%80 = load float, float* %TEMP0.x
%81 = bitcast float %80 to i32
%82 = mul i32 2, %81
%83 = add i32 %82, 1
%84 = bitcast i32 %83 to float
store float %84, float* %TEMP9.x
%85 = load float, float* %TEMP9.x
store float %85, float* %TEMP10.x
%86 = load float, float* %TEMP8.x
%87 = bitcast float %86 to i32
%88 = add i32 %87, -1
%89 = load float, float* %TEMP8.y
%90 = bitcast float %89 to i32
%91 = add i32 %90, -1
%92 = load float, float* %TEMP8.z
%93 = bitcast float %92 to i32
%94 = add i32 %93, -1
%95 = load float, float* %TEMP8.w
%96 = bitcast float %95 to i32
%97 = add i32 %96, -1
%98 = bitcast i32 %88 to float
store float %98, float* %TEMP11.x
%99 = bitcast i32 %91 to float
store float %99, float* %TEMP11.y
%100 = bitcast i32 %94 to float
store float %100, float* %TEMP11.z
%101 = bitcast i32 %97 to float
store float %101, float* %TEMP11.w
%102 = load float, float* %TEMP11.x
%103 = load float, float* %TEMP11.y
%104 = load float, float* %TEMP11.z
%105 = load float, float* %TEMP11.w
%106 = insertelement <4 x float> undef, float %102, i32 0
%107 = insertelement <4 x float> %106, float %103, i32 1
%108 = insertelement <4 x float> %107, float %104, i32 2
%109 = insertelement <4 x float> %108, float %105, i32 3
%110 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %3, i32 0, i32 14, !amdgpu.uniform !0
%111 = load <8 x i32>, <8 x i32> addrspace(2)* %110, !invariant.load !0
%112 = extractelement <8 x i32> %111, i32 6
%113 = and i32 %112, -2097153
%114 = insertelement <8 x i32> %111, i32 %113, i32 6
%115 = load float, float* %TEMP9.x
%116 = bitcast float %115 to i32
call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %109, i32 %116, <8 x i32> %114, i32 15, i1 false, i1 false, i1 false, i1 false) #2
%117 = load float, float* %TEMP0.x
%118 = bitcast float %117 to i32
%119 = add i32 %118, 1
%120 = bitcast i32 %119 to float
store float %120, float* %TEMP0.x
br label %loop1
endloop19: ; preds = %if3
%121 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %5) #7
%122 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 1, i32 0, i32 %5) #7
%123 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 2, i32 0, i32 %5) #7
%124 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 3, i32 0, i32 %5) #7
%125 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %5) #7
%126 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 1, i32 0, i32 %5) #7
%127 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 2, i32 0, i32 %5) #7
%128 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 3, i32 0, i32 %5) #7
%129 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %5) #7
%130 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 1, i32 0, i32 %5) #7
%131 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 2, i32 0, i32 %5) #7
%132 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 3, i32 0, i32 %5) #7
%133 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %5) #7
%134 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 1, i32 0, i32 %5) #7
%135 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 2, i32 0, i32 %5) #7
%136 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 3, i32 0, i32 %5) #7
store float %121, float* %OUT0.x
store float %126, float* %OUT0.y
store float %131, float* %OUT0.z
store float %136, float* %OUT0.w
%137 = load float, float* %OUT0.x
%138 = load float, float* %OUT0.y
%139 = load float, float* %OUT0.z
%140 = load float, float* %OUT0.w
%141 = bitcast float %4 to i32
%142 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %141, 8
%143 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %142, float %137, 9
%144 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %143, float %138, 10
%145 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %144, float %139, 11
%146 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %145, float %140, 12
%147 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %146, float %20, 23
ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %147
}
; Function Attrs: nounwind readonly
declare <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32, <8 x i32>, i32, i1, i1, i1, i1) #1
; Function Attrs: nounwind writeonly
declare void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float>, i32, <8 x i32>, i32, i1, i1, i1, i1) #2
; Function Attrs: nounwind readnone speculatable
declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #3
; Function Attrs: alwaysinline
define private amdgpu_ps void @ps_epilog(i64 inreg, i64 inreg, i64 inreg, i64 inreg, float inreg, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float) #4 {
main_body:
call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float %5, float undef, float undef, float undef, i1 true, i1 true) #5
ret void
}
; Function Attrs: nounwind
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #5
define amdgpu_ps void @wrapper([12 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [0 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [80 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #6 {
main_body:
%22 = ptrtoint [12 x <4 x i32>] addrspace(2)* %0 to i64
%23 = bitcast i64 %22 to <2 x i32>
%24 = extractelement <2 x i32> %23, i32 0
%25 = extractelement <2 x i32> %23, i32 1
%26 = ptrtoint [0 x <8 x i32>] addrspace(2)* %1 to i64
%27 = bitcast i64 %26 to <2 x i32>
%28 = extractelement <2 x i32> %27, i32 0
%29 = extractelement <2 x i32> %27, i32 1
%30 = ptrtoint [0 x <4 x i32>] addrspace(2)* %2 to i64
%31 = bitcast i64 %30 to <2 x i32>
%32 = extractelement <2 x i32> %31, i32 0
%33 = extractelement <2 x i32> %31, i32 1
%34 = ptrtoint [80 x <8 x i32>] addrspace(2)* %3 to i64
%35 = bitcast i64 %34 to <2 x i32>
%36 = extractelement <2 x i32> %35, i32 0
%37 = extractelement <2 x i32> %35, i32 1
%38 = bitcast float %4 to i32
%39 = bitcast <2 x i32> %6 to <2 x float>
%40 = extractelement <2 x float> %39, i32 0
%41 = extractelement <2 x float> %39, i32 1
%42 = bitcast <2 x i32> %7 to <2 x float>
%43 = extractelement <2 x float> %42, i32 0
%44 = extractelement <2 x float> %42, i32 1
%45 = bitcast <2 x i32> %8 to <2 x float>
%46 = extractelement <2 x float> %45, i32 0
%47 = extractelement <2 x float> %45, i32 1
%48 = bitcast <3 x i32> %9 to <3 x float>
%49 = extractelement <3 x float> %48, i32 0
%50 = extractelement <3 x float> %48, i32 1
%51 = extractelement <3 x float> %48, i32 2
%52 = bitcast <2 x i32> %10 to <2 x float>
%53 = extractelement <2 x float> %52, i32 0
%54 = extractelement <2 x float> %52, i32 1
%55 = bitcast <2 x i32> %11 to <2 x float>
%56 = extractelement <2 x float> %55, i32 0
%57 = extractelement <2 x float> %55, i32 1
%58 = bitcast <2 x i32> %12 to <2 x float>
%59 = extractelement <2 x float> %58, i32 0
%60 = extractelement <2 x float> %58, i32 1
%61 = bitcast i32 %18 to float
%62 = bitcast i32 %19 to float
%63 = bitcast i32 %21 to float
%64 = insertelement <2 x i32> undef, i32 %24, i32 0
%65 = insertelement <2 x i32> %64, i32 %25, i32 1
%66 = bitcast <2 x i32> %65 to i64
%67 = inttoptr i64 %66 to [12 x <4 x i32>] addrspace(2)*
%68 = insertelement <2 x i32> undef, i32 %28, i32 0
%69 = insertelement <2 x i32> %68, i32 %29, i32 1
%70 = bitcast <2 x i32> %69 to i64
%71 = inttoptr i64 %70 to [0 x <8 x i32>] addrspace(2)*
%72 = insertelement <2 x i32> undef, i32 %32, i32 0
%73 = insertelement <2 x i32> %72, i32 %33, i32 1
%74 = bitcast <2 x i32> %73 to i64
%75 = inttoptr i64 %74 to [0 x <4 x i32>] addrspace(2)*
%76 = insertelement <2 x i32> undef, i32 %36, i32 0
%77 = insertelement <2 x i32> %76, i32 %37, i32 1
%78 = bitcast <2 x i32> %77 to i64
%79 = inttoptr i64 %78 to [80 x <8 x i32>] addrspace(2)*
%80 = bitcast i32 %38 to float
%81 = insertelement <2 x float> undef, float %40, i32 0
%82 = insertelement <2 x float> %81, float %41, i32 1
%83 = bitcast <2 x float> %82 to <2 x i32>
%84 = insertelement <2 x float> undef, float %43, i32 0
%85 = insertelement <2 x float> %84, float %44, i32 1
%86 = bitcast <2 x float> %85 to <2 x i32>
%87 = insertelement <2 x float> undef, float %46, i32 0
%88 = insertelement <2 x float> %87, float %47, i32 1
%89 = bitcast <2 x float> %88 to <2 x i32>
%90 = insertelement <3 x float> undef, float %49, i32 0
%91 = insertelement <3 x float> %90, float %50, i32 1
%92 = insertelement <3 x float> %91, float %51, i32 2
%93 = bitcast <3 x float> %92 to <3 x i32>
%94 = insertelement <2 x float> undef, float %53, i32 0
%95 = insertelement <2 x float> %94, float %54, i32 1
%96 = bitcast <2 x float> %95 to <2 x i32>
%97 = insertelement <2 x float> undef, float %56, i32 0
%98 = insertelement <2 x float> %97, float %57, i32 1
%99 = bitcast <2 x float> %98 to <2 x i32>
%100 = insertelement <2 x float> undef, float %59, i32 0
%101 = insertelement <2 x float> %100, float %60, i32 1
%102 = bitcast <2 x float> %101 to <2 x i32>
%103 = bitcast float %61 to i32
%104 = bitcast float %62 to i32
%105 = bitcast float %63 to i32
%106 = call <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([12 x <4 x i32>] addrspace(2)* %67, [0 x <8 x i32>] addrspace(2)* %71, [0 x <4 x i32>] addrspace(2)* %75, [80 x <8 x i32>] addrspace(2)* %79, float %80, i32 %5, <2 x i32> %83, <2 x i32> %86, <2 x i32> %89, <3 x i32> %93, <2 x i32> %96, <2 x i32> %99, <2 x i32> %102, float %13, float %14, float %15, float %16, float %17, i32 %103, i32 %104, float %20, i32 %105)
%107 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 0
%108 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 1
%109 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 2
%110 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 3
%111 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 4
%112 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 5
%113 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 6
%114 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 7
%115 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 8
%116 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 9
%117 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 10
%118 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 11
%119 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 12
%120 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 13
%121 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 14
%122 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 15
%123 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 16
%124 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 17
%125 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 18
%126 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 19
%127 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 20
%128 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 21
%129 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 22
%130 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 23
%131 = insertelement <2 x i32> undef, i32 %107, i32 0
%132 = insertelement <2 x i32> %131, i32 %108, i32 1
%133 = bitcast <2 x i32> %132 to i64
%134 = insertelement <2 x i32> undef, i32 %109, i32 0
%135 = insertelement <2 x i32> %134, i32 %110, i32 1
%136 = bitcast <2 x i32> %135 to i64
%137 = insertelement <2 x i32> undef, i32 %111, i32 0
%138 = insertelement <2 x i32> %137, i32 %112, i32 1
%139 = bitcast <2 x i32> %138 to i64
%140 = insertelement <2 x i32> undef, i32 %113, i32 0
%141 = insertelement <2 x i32> %140, i32 %114, i32 1
%142 = bitcast <2 x i32> %141 to i64
%143 = bitcast i32 %115 to float
call void @ps_epilog(i64 %133, i64 %136, i64 %139, i64 %142, float %143, float %116, float %117, float %118, float %119, float %120, float %121, float %122, float %123, float %124, float %125, float %126, float %127, float %128, float %129, float %130)
ret void
}
attributes #0 = { alwaysinline "no-signed-zeros-fp-math"="true" }
attributes #1 = { nounwind readonly }
attributes #2 = { nounwind writeonly }
attributes #3 = { nounwind readnone speculatable }
attributes #4 = { alwaysinline "InitialPSInputAddr"="16777215" "no-signed-zeros-fp-math"="true" }
attributes #5 = { nounwind }
attributes #6 = { "no-signed-zeros-fp-math"="true" }
attributes #7 = { nounwind readnone }
!0 = !{}
radeonsi: Compiling shader 6
SHADER KEY
part.ps.prolog.color_two_side = 0
part.ps.prolog.flatshade_colors = 0
part.ps.prolog.poly_stipple = 0
part.ps.prolog.force_persp_sample_interp = 0
part.ps.prolog.force_linear_sample_interp = 0
part.ps.prolog.force_persp_center_interp = 0
part.ps.prolog.force_linear_center_interp = 0
part.ps.prolog.bc_optimize_for_persp = 0
part.ps.prolog.bc_optimize_for_linear = 0
part.ps.epilog.spi_shader_col_format = 0x1
part.ps.epilog.color_is_int8 = 0x0
part.ps.epilog.color_is_int10 = 0x0
part.ps.epilog.last_cbuf = 0
part.ps.epilog.alpha_func = 7
part.ps.epilog.alpha_to_one = 0
part.ps.epilog.poly_line_smoothing = 0
part.ps.epilog.clamp_color = 0
Pixel Shader:
Shader main disassembly:
wrapper:
BB0_0:
s_load_dwordx8 s[20:27], s[6:7], 0x1c0 ; C00E0503 000001C0
s_load_dwordx8 s[12:19], s[6:7], 0x1e0 ; C00E0303 000001E0
s_mov_b32 s0, 0 ; BE800080
s_mov_b32 s1, 1 ; BE810081
v_mov_b32_e32 v0, 16 ; 7E000290
s_waitcnt lgkmcnt(0) ; BF8C007F
s_and_b32 s26, s26, 0xffdfffff ; 861AFF1A FFDFFFFF
s_cmpk_gt_i32 s0, 0x2ff ; B20002FF
s_cbranch_scc1 BB0_2 ; BF850000
BB0_1:
image_load v[1:4], v0, s[12:19] dmask:0xf unorm ; F0001F00 00030100
image_load v[5:8], v0, s[12:19] dmask:0xf unorm ; F0001F00 00030500
s_add_i32 s2, s1, -1 ; 8102C101
v_mov_b32_e32 v9, s1 ; 7E120201
v_mov_b32_e32 v10, s2 ; 7E140202
s_add_i32 s0, s0, 1 ; 81008100
s_add_i32 s1, s1, 2 ; 81018201
s_waitcnt vmcnt(1) ; BF8C0F71
v_add_u32_e32 v1, vcc, 1, v1 ; 32020281
v_add_u32_e32 v2, vcc, 1, v2 ; 32040481
v_add_u32_e32 v3, vcc, 1, v3 ; 32060681
v_add_u32_e32 v4, vcc, 1, v4 ; 32080881
s_waitcnt vmcnt(0) ; BF8C0F70
v_add_u32_e32 v5, vcc, -1, v5 ; 320A0AC1
v_add_u32_e32 v6, vcc, -1, v6 ; 320C0CC1
v_add_u32_e32 v7, vcc, -1, v7 ; 320E0EC1
v_add_u32_e32 v8, vcc, -1, v8 ; 321010C1
image_store v[1:4], v10, s[20:27] dmask:0xf unorm ; F0201F00 0005010A
image_store v[5:8], v9, s[20:27] dmask:0xf unorm ; F0201F00 00050509
s_cmpk_gt_i32 s0, 0x2ff ; B20002FF
s_cbranch_scc0 BB0_1 ; BF840000
BB0_2:
s_mov_b32 m0, s9 ; BEFC0009
v_interp_mov_f32 v0, p0, attr0.x ; D4020002
exp mrt0 v0, off, off, off done vm ; C4001801 00000000
s_endpgm ; BF810000
*** SHADER CONFIG ***
SPI_PS_INPUT_ADDR = 0x0001
SPI_PS_INPUT_ENA = 0x0001
*** SHADER STATS ***
SGPRS: 96
VGPRS: 12
Spilled SGPRs: 0
Spilled VGPRs: 0
Private memory VGPRs: 0
Code Size: 168 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
Max Waves: 8
********************
FRAG
DCL IN[0], GENERIC[0], CONSTANT
DCL OUT[0], COLOR
DCL IMAGE[0], 1D, PIPE_FORMAT_R32_UINT, WR
DCL IMAGE[1], 1D, PIPE_FORMAT_R32_UINT, WR
DCL TEMP[0..11], LOCAL
IMM[0] INT32 {0, 768, 16, 2}
IMM[1] UINT32 {1, 4294967295, 0, 0}
IMM[2] INT32 {1, 0, 0, 0}
0: MOV TEMP[0].x, IMM[0].xxxx
1: BGNLOOP
2: ISGE TEMP[1].x, TEMP[0].xxxx, IMM[0].yyyy
3: UIF TEMP[1].xxxx
4: BRK
5: ENDIF
6: MOV TEMP[2].x, IMM[0].zzzz
7: LOAD TEMP[3], IMAGE[0], IMM[0].zzzz, RESTRICT, 1D, PIPE_FORMAT_R32_UINT
8: UMUL TEMP[4].x, IMM[0].wwww, TEMP[0].xxxx
9: MOV TEMP[5].x, TEMP[4].xxxx
10: UADD TEMP[6], TEMP[3], IMM[1].xxxx
11: STORE IMAGE[1], TEMP[4].xxxx, TEMP[6], RESTRICT, 1D, PIPE_FORMAT_R32_UINT
12: MOV TEMP[7].x, IMM[0].zzzz
13: LOAD TEMP[8], IMAGE[0], IMM[0].zzzz, RESTRICT, 1D, PIPE_FORMAT_R32_UINT
14: UMAD TEMP[9].x, IMM[0].wwww, TEMP[0].xxxx, IMM[2].xxxx
15: MOV TEMP[10].x, TEMP[9].xxxx
16: UADD TEMP[11], TEMP[8], IMM[1].yyyy
17: STORE IMAGE[1], TEMP[9].xxxx, TEMP[11], RESTRICT, 1D, PIPE_FORMAT_R32_UINT
18: UADD TEMP[0].x, TEMP[0].xxxx, IMM[2].xxxx
19: ENDLOOP
20: MOV OUT[0], IN[0]
21: END
; ModuleID = 'tgsi'
source_filename = "tgsi"
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
target triple = "amdgcn--"
; Function Attrs: alwaysinline
define private amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([12 x <4 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), [0 x <4 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), [80 x <8 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 {
main_body:
%TEMP11.w = alloca float
%TEMP11.z = alloca float
%TEMP11.y = alloca float
%TEMP11.x = alloca float
%TEMP10.w = alloca float
%TEMP10.z = alloca float
%TEMP10.y = alloca float
%TEMP10.x = alloca float
%TEMP9.w = alloca float
%TEMP9.z = alloca float
%TEMP9.y = alloca float
%TEMP9.x = alloca float
%TEMP8.w = alloca float
%TEMP8.z = alloca float
%TEMP8.y = alloca float
%TEMP8.x = alloca float
%TEMP7.w = alloca float
%TEMP7.z = alloca float
%TEMP7.y = alloca float
%TEMP7.x = alloca float
%TEMP6.w = alloca float
%TEMP6.z = alloca float
%TEMP6.y = alloca float
%TEMP6.x = alloca float
%TEMP5.w = alloca float
%TEMP5.z = alloca float
%TEMP5.y = alloca float
%TEMP5.x = alloca float
%TEMP4.w = alloca float
%TEMP4.z = alloca float
%TEMP4.y = alloca float
%TEMP4.x = alloca float
%TEMP3.w = alloca float
%TEMP3.z = alloca float
%TEMP3.y = alloca float
%TEMP3.x = alloca float
%TEMP2.w = alloca float
%TEMP2.z = alloca float
%TEMP2.y = alloca float
%TEMP2.x = alloca float
%TEMP1.w = alloca float
%TEMP1.z = alloca float
%TEMP1.y = alloca float
%TEMP1.x = alloca float
%TEMP0.w = alloca float
%TEMP0.z = alloca float
%TEMP0.y = alloca float
%TEMP0.x = alloca float
%OUT0.w = alloca float
%OUT0.z = alloca float
%OUT0.y = alloca float
%OUT0.x = alloca float
store float 0.000000e+00, float* %TEMP0.x
br label %loop1
loop1: ; preds = %endif5, %main_body
%22 = load float, float* %TEMP0.x
%23 = bitcast float %22 to i32
%24 = icmp sge i32 %23, 768
%25 = sext i1 %24 to i32
%26 = bitcast i32 %25 to float
store float %26, float* %TEMP1.x
%27 = load float, float* %TEMP1.x
%28 = bitcast float %27 to i32
%29 = icmp ne i32 %28, 0
br i1 %29, label %if3, label %endif5
if3: ; preds = %loop1
br label %endloop19
endif5: ; preds = %loop1
store float 0x36E0000000000000, float* %TEMP2.x
%30 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %3, i32 0, i32 15, !amdgpu.uniform !0
%31 = load <8 x i32>, <8 x i32> addrspace(2)* %30, !invariant.load !0
%32 = call nsz <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32 16, <8 x i32> %31, i32 15, i1 false, i1 false, i1 false, i1 false) #7
%33 = extractelement <4 x float> %32, i32 0
%34 = extractelement <4 x float> %32, i32 1
%35 = extractelement <4 x float> %32, i32 2
%36 = extractelement <4 x float> %32, i32 3
store float %33, float* %TEMP3.x
store float %34, float* %TEMP3.y
store float %35, float* %TEMP3.z
store float %36, float* %TEMP3.w
%37 = load float, float* %TEMP0.x
%38 = bitcast float %37 to i32
%39 = mul i32 2, %38
%40 = bitcast i32 %39 to float
store float %40, float* %TEMP4.x
%41 = load float, float* %TEMP4.x
store float %41, float* %TEMP5.x
%42 = load float, float* %TEMP3.x
%43 = bitcast float %42 to i32
%44 = add i32 %43, 1
%45 = load float, float* %TEMP3.y
%46 = bitcast float %45 to i32
%47 = add i32 %46, 1
%48 = load float, float* %TEMP3.z
%49 = bitcast float %48 to i32
%50 = add i32 %49, 1
%51 = load float, float* %TEMP3.w
%52 = bitcast float %51 to i32
%53 = add i32 %52, 1
%54 = bitcast i32 %44 to float
store float %54, float* %TEMP6.x
%55 = bitcast i32 %47 to float
store float %55, float* %TEMP6.y
%56 = bitcast i32 %50 to float
store float %56, float* %TEMP6.z
%57 = bitcast i32 %53 to float
store float %57, float* %TEMP6.w
%58 = load float, float* %TEMP6.x
%59 = load float, float* %TEMP6.y
%60 = load float, float* %TEMP6.z
%61 = load float, float* %TEMP6.w
%62 = insertelement <4 x float> undef, float %58, i32 0
%63 = insertelement <4 x float> %62, float %59, i32 1
%64 = insertelement <4 x float> %63, float %60, i32 2
%65 = insertelement <4 x float> %64, float %61, i32 3
%66 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %3, i32 0, i32 14, !amdgpu.uniform !0
%67 = load <8 x i32>, <8 x i32> addrspace(2)* %66, !invariant.load !0
%68 = extractelement <8 x i32> %67, i32 6
%69 = and i32 %68, -2097153
%70 = insertelement <8 x i32> %67, i32 %69, i32 6
%71 = load float, float* %TEMP4.x
%72 = bitcast float %71 to i32
call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %65, i32 %72, <8 x i32> %70, i32 15, i1 false, i1 false, i1 false, i1 false) #8
store float 0x36E0000000000000, float* %TEMP7.x
%73 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %3, i32 0, i32 15, !amdgpu.uniform !0
%74 = load <8 x i32>, <8 x i32> addrspace(2)* %73, !invariant.load !0
%75 = call nsz <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32 16, <8 x i32> %74, i32 15, i1 false, i1 false, i1 false, i1 false) #7
%76 = extractelement <4 x float> %75, i32 0
%77 = extractelement <4 x float> %75, i32 1
%78 = extractelement <4 x float> %75, i32 2
%79 = extractelement <4 x float> %75, i32 3
store float %76, float* %TEMP8.x
store float %77, float* %TEMP8.y
store float %78, float* %TEMP8.z
store float %79, float* %TEMP8.w
%80 = load float, float* %TEMP0.x
%81 = bitcast float %80 to i32
%82 = mul i32 2, %81
%83 = add i32 %82, 1
%84 = bitcast i32 %83 to float
store float %84, float* %TEMP9.x
%85 = load float, float* %TEMP9.x
store float %85, float* %TEMP10.x
%86 = load float, float* %TEMP8.x
%87 = bitcast float %86 to i32
%88 = add i32 %87, -1
%89 = load float, float* %TEMP8.y
%90 = bitcast float %89 to i32
%91 = add i32 %90, -1
%92 = load float, float* %TEMP8.z
%93 = bitcast float %92 to i32
%94 = add i32 %93, -1
%95 = load float, float* %TEMP8.w
%96 = bitcast float %95 to i32
%97 = add i32 %96, -1
%98 = bitcast i32 %88 to float
store float %98, float* %TEMP11.x
%99 = bitcast i32 %91 to float
store float %99, float* %TEMP11.y
%100 = bitcast i32 %94 to float
store float %100, float* %TEMP11.z
%101 = bitcast i32 %97 to float
store float %101, float* %TEMP11.w
%102 = load float, float* %TEMP11.x
%103 = load float, float* %TEMP11.y
%104 = load float, float* %TEMP11.z
%105 = load float, float* %TEMP11.w
%106 = insertelement <4 x float> undef, float %102, i32 0
%107 = insertelement <4 x float> %106, float %103, i32 1
%108 = insertelement <4 x float> %107, float %104, i32 2
%109 = insertelement <4 x float> %108, float %105, i32 3
%110 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %3, i32 0, i32 14, !amdgpu.uniform !0
%111 = load <8 x i32>, <8 x i32> addrspace(2)* %110, !invariant.load !0
%112 = extractelement <8 x i32> %111, i32 6
%113 = and i32 %112, -2097153
%114 = insertelement <8 x i32> %111, i32 %113, i32 6
%115 = load float, float* %TEMP9.x
%116 = bitcast float %115 to i32
call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %109, i32 %116, <8 x i32> %114, i32 15, i1 false, i1 false, i1 false, i1 false) #8
%117 = load float, float* %TEMP0.x
%118 = bitcast float %117 to i32
%119 = add i32 %118, 1
%120 = bitcast i32 %119 to float
store float %120, float* %TEMP0.x
br label %loop1
endloop19: ; preds = %if3
%121 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %5) #7
%122 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 1, i32 0, i32 %5) #7
%123 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 2, i32 0, i32 %5) #7
%124 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 3, i32 0, i32 %5) #7
%125 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %5) #7
%126 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 1, i32 0, i32 %5) #7
%127 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 2, i32 0, i32 %5) #7
%128 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 3, i32 0, i32 %5) #7
%129 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %5) #7
%130 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 1, i32 0, i32 %5) #7
%131 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 2, i32 0, i32 %5) #7
%132 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 3, i32 0, i32 %5) #7
%133 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %5) #7
%134 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 1, i32 0, i32 %5) #7
%135 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 2, i32 0, i32 %5) #7
%136 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 3, i32 0, i32 %5) #7
store float %121, float* %OUT0.x
store float %126, float* %OUT0.y
store float %131, float* %OUT0.z
store float %136, float* %OUT0.w
%137 = load float, float* %OUT0.x
%138 = load float, float* %OUT0.y
%139 = load float, float* %OUT0.z
%140 = load float, float* %OUT0.w
%141 = bitcast float %4 to i32
%142 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %141, 8
%143 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %142, float %137, 9
%144 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %143, float %138, 10
%145 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %144, float %139, 11
%146 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %145, float %140, 12
%147 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %146, float %20, 23
ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %147
}
; Function Attrs: nounwind readonly
declare <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32, <8 x i32>, i32, i1, i1, i1, i1) #1
; Function Attrs: nounwind writeonly
declare void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float>, i32, <8 x i32>, i32, i1, i1, i1, i1) #2
; Function Attrs: nounwind readnone speculatable
declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #3
; Function Attrs: alwaysinline
define private amdgpu_ps void @ps_epilog(i64 inreg, i64 inreg, i64 inreg, i64 inreg, float inreg, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float) #4 {
main_body:
call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float %5, float undef, float undef, float undef, i1 true, i1 true) #5
ret void
}
; Function Attrs: nounwind
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #5
define amdgpu_ps void @wrapper([12 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [0 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [80 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #6 {
main_body:
%22 = ptrtoint [12 x <4 x i32>] addrspace(2)* %0 to i64
%23 = bitcast i64 %22 to <2 x i32>
%24 = extractelement <2 x i32> %23, i32 0
%25 = extractelement <2 x i32> %23, i32 1
%26 = ptrtoint [0 x <8 x i32>] addrspace(2)* %1 to i64
%27 = bitcast i64 %26 to <2 x i32>
%28 = extractelement <2 x i32> %27, i32 0
%29 = extractelement <2 x i32> %27, i32 1
%30 = ptrtoint [0 x <4 x i32>] addrspace(2)* %2 to i64
%31 = bitcast i64 %30 to <2 x i32>
%32 = extractelement <2 x i32> %31, i32 0
%33 = extractelement <2 x i32> %31, i32 1
%34 = ptrtoint [80 x <8 x i32>] addrspace(2)* %3 to i64
%35 = bitcast i64 %34 to <2 x i32>
%36 = extractelement <2 x i32> %35, i32 0
%37 = extractelement <2 x i32> %35, i32 1
%38 = bitcast float %4 to i32
%39 = bitcast <2 x i32> %6 to <2 x float>
%40 = extractelement <2 x float> %39, i32 0
%41 = extractelement <2 x float> %39, i32 1
%42 = bitcast <2 x i32> %7 to <2 x float>
%43 = extractelement <2 x float> %42, i32 0
%44 = extractelement <2 x float> %42, i32 1
%45 = bitcast <2 x i32> %8 to <2 x float>
%46 = extractelement <2 x float> %45, i32 0
%47 = extractelement <2 x float> %45, i32 1
%48 = bitcast <3 x i32> %9 to <3 x float>
%49 = extractelement <3 x float> %48, i32 0
%50 = extractelement <3 x float> %48, i32 1
%51 = extractelement <3 x float> %48, i32 2
%52 = bitcast <2 x i32> %10 to <2 x float>
%53 = extractelement <2 x float> %52, i32 0
%54 = extractelement <2 x float> %52, i32 1
%55 = bitcast <2 x i32> %11 to <2 x float>
%56 = extractelement <2 x float> %55, i32 0
%57 = extractelement <2 x float> %55, i32 1
%58 = bitcast <2 x i32> %12 to <2 x float>
%59 = extractelement <2 x float> %58, i32 0
%60 = extractelement <2 x float> %58, i32 1
%61 = bitcast i32 %18 to float
%62 = bitcast i32 %19 to float
%63 = bitcast i32 %21 to float
%64 = insertelement <2 x i32> undef, i32 %24, i32 0
%65 = insertelement <2 x i32> %64, i32 %25, i32 1
%66 = bitcast <2 x i32> %65 to i64
%67 = inttoptr i64 %66 to [12 x <4 x i32>] addrspace(2)*
%68 = insertelement <2 x i32> undef, i32 %28, i32 0
%69 = insertelement <2 x i32> %68, i32 %29, i32 1
%70 = bitcast <2 x i32> %69 to i64
%71 = inttoptr i64 %70 to [0 x <8 x i32>] addrspace(2)*
%72 = insertelement <2 x i32> undef, i32 %32, i32 0
%73 = insertelement <2 x i32> %72, i32 %33, i32 1
%74 = bitcast <2 x i32> %73 to i64
%75 = inttoptr i64 %74 to [0 x <4 x i32>] addrspace(2)*
%76 = insertelement <2 x i32> undef, i32 %36, i32 0
%77 = insertelement <2 x i32> %76, i32 %37, i32 1
%78 = bitcast <2 x i32> %77 to i64
%79 = inttoptr i64 %78 to [80 x <8 x i32>] addrspace(2)*
%80 = bitcast i32 %38 to float
%81 = insertelement <2 x float> undef, float %40, i32 0
%82 = insertelement <2 x float> %81, float %41, i32 1
%83 = bitcast <2 x float> %82 to <2 x i32>
%84 = insertelement <2 x float> undef, float %43, i32 0
%85 = insertelement <2 x float> %84, float %44, i32 1
%86 = bitcast <2 x float> %85 to <2 x i32>
%87 = insertelement <2 x float> undef, float %46, i32 0
%88 = insertelement <2 x float> %87, float %47, i32 1
%89 = bitcast <2 x float> %88 to <2 x i32>
%90 = insertelement <3 x float> undef, float %49, i32 0
%91 = insertelement <3 x float> %90, float %50, i32 1
%92 = insertelement <3 x float> %91, float %51, i32 2
%93 = bitcast <3 x float> %92 to <3 x i32>
%94 = insertelement <2 x float> undef, float %53, i32 0
%95 = insertelement <2 x float> %94, float %54, i32 1
%96 = bitcast <2 x float> %95 to <2 x i32>
%97 = insertelement <2 x float> undef, float %56, i32 0
%98 = insertelement <2 x float> %97, float %57, i32 1
%99 = bitcast <2 x float> %98 to <2 x i32>
%100 = insertelement <2 x float> undef, float %59, i32 0
%101 = insertelement <2 x float> %100, float %60, i32 1
%102 = bitcast <2 x float> %101 to <2 x i32>
%103 = bitcast float %61 to i32
%104 = bitcast float %62 to i32
%105 = bitcast float %63 to i32
%106 = call <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([12 x <4 x i32>] addrspace(2)* %67, [0 x <8 x i32>] addrspace(2)* %71, [0 x <4 x i32>] addrspace(2)* %75, [80 x <8 x i32>] addrspace(2)* %79, float %80, i32 %5, <2 x i32> %83, <2 x i32> %86, <2 x i32> %89, <3 x i32> %93, <2 x i32> %96, <2 x i32> %99, <2 x i32> %102, float %13, float %14, float %15, float %16, float %17, i32 %103, i32 %104, float %20, i32 %105)
%107 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 0
%108 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 1
%109 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 2
%110 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 3
%111 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 4
%112 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 5
%113 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 6
%114 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 7
%115 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 8
%116 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 9
%117 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 10
%118 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 11
%119 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 12
%120 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 13
%121 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 14
%122 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 15
%123 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 16
%124 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 17
%125 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 18
%126 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 19
%127 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 20
%128 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 21
%129 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 22
%130 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 23
%131 = insertelement <2 x i32> undef, i32 %107, i32 0
%132 = insertelement <2 x i32> %131, i32 %108, i32 1
%133 = bitcast <2 x i32> %132 to i64
%134 = insertelement <2 x i32> undef, i32 %109, i32 0
%135 = insertelement <2 x i32> %134, i32 %110, i32 1
%136 = bitcast <2 x i32> %135 to i64
%137 = insertelement <2 x i32> undef, i32 %111, i32 0
%138 = insertelement <2 x i32> %137, i32 %112, i32 1
%139 = bitcast <2 x i32> %138 to i64
%140 = insertelement <2 x i32> undef, i32 %113, i32 0
%141 = insertelement <2 x i32> %140, i32 %114, i32 1
%142 = bitcast <2 x i32> %141 to i64
%143 = bitcast i32 %115 to float
call void @ps_epilog(i64 %133, i64 %136, i64 %139, i64 %142, float %143, float %116, float %117, float %118, float %119, float %120, float %121, float %122, float %123, float %124, float %125, float %126, float %127, float %128, float %129, float %130)
ret void
}
attributes #0 = { alwaysinline "no-signed-zeros-fp-math"="true" }
attributes #1 = { nounwind readonly }
attributes #2 = { nounwind writeonly }
attributes #3 = { nounwind readnone speculatable }
attributes #4 = { alwaysinline "InitialPSInputAddr"="16777215" "no-signed-zeros-fp-math"="true" }
attributes #5 = { nounwind }
attributes #6 = { "no-signed-zeros-fp-math"="true" }
attributes #7 = { nounwind readnone }
attributes #8 = { inaccessiblememonly nounwind }
!0 = !{}
radeonsi: Compiling shader 8
SHADER KEY
part.ps.prolog.color_two_side = 0
part.ps.prolog.flatshade_colors = 0
part.ps.prolog.poly_stipple = 0
part.ps.prolog.force_persp_sample_interp = 0
part.ps.prolog.force_linear_sample_interp = 0
part.ps.prolog.force_persp_center_interp = 0
part.ps.prolog.force_linear_center_interp = 0
part.ps.prolog.bc_optimize_for_persp = 0
part.ps.prolog.bc_optimize_for_linear = 0
part.ps.epilog.spi_shader_col_format = 0x1
part.ps.epilog.color_is_int8 = 0x0
part.ps.epilog.color_is_int10 = 0x0
part.ps.epilog.last_cbuf = 0
part.ps.epilog.alpha_func = 7
part.ps.epilog.alpha_to_one = 0
part.ps.epilog.poly_line_smoothing = 0
part.ps.epilog.clamp_color = 0
Pixel Shader:
Shader main disassembly:
wrapper:
BB0_0:
s_load_dwordx8 s[20:27], s[6:7], 0x1c0 ; C00E0503 000001C0
s_load_dwordx8 s[12:19], s[6:7], 0x1e0 ; C00E0303 000001E0
s_mov_b32 s0, 0 ; BE800080
s_mov_b32 s1, 1 ; BE810081
v_mov_b32_e32 v0, 16 ; 7E000290
s_waitcnt lgkmcnt(0) ; BF8C007F
s_and_b32 s26, s26, 0xffdfffff ; 861AFF1A FFDFFFFF
s_cmpk_gt_i32 s0, 0x2ff ; B20002FF
s_cbranch_scc1 BB0_2 ; BF850000
BB0_1:
image_load v[1:4], v0, s[12:19] dmask:0xf unorm ; F0001F00 00030100
s_add_i32 s2, s1, -1 ; 8102C101
v_mov_b32_e32 v9, s1 ; 7E120201
v_mov_b32_e32 v10, s2 ; 7E140202
s_add_i32 s0, s0, 1 ; 81008100
s_add_i32 s1, s1, 2 ; 81018201
s_waitcnt vmcnt(0) ; BF8C0F70
v_add_u32_e32 v5, vcc, 1, v1 ; 320A0281
v_add_u32_e32 v6, vcc, 1, v2 ; 320C0481
v_add_u32_e32 v7, vcc, 1, v3 ; 320E0681
v_add_u32_e32 v8, vcc, 1, v4 ; 32100881
v_add_u32_e32 v1, vcc, -1, v1 ; 320202C1
v_add_u32_e32 v2, vcc, -1, v2 ; 320404C1
v_add_u32_e32 v3, vcc, -1, v3 ; 320606C1
v_add_u32_e32 v4, vcc, -1, v4 ; 320808C1
image_store v[5:8], v10, s[20:27] dmask:0xf unorm ; F0201F00 0005050A
image_store v[1:4], v9, s[20:27] dmask:0xf unorm ; F0201F00 00050109
s_cmpk_gt_i32 s0, 0x2ff ; B20002FF
s_cbranch_scc0 BB0_1 ; BF840000
BB0_2:
s_mov_b32 m0, s9 ; BEFC0009
v_interp_mov_f32 v0, p0, attr0.x ; D4020002
exp mrt0 v0, off, off, off done vm ; C4001801 00000000
s_endpgm ; BF810000
*** SHADER CONFIG ***
SPI_PS_INPUT_ADDR = 0x0001
SPI_PS_INPUT_ENA = 0x0001
*** SHADER STATS ***
SGPRS: 96
VGPRS: 12
Spilled SGPRs: 0
Spilled VGPRs: 0
Private memory VGPRs: 0
Code Size: 156 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
Max Waves: 8
********************
-------------- next part --------------
; ModuleID = 'tgsi'
source_filename = "tgsi"
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
target triple = "amdgcn--"
; Function Attrs: alwaysinline
define private amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([12 x <4 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), [0 x <4 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), [80 x <8 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 {
main_body:
%TEMP11.w = alloca float
%TEMP11.z = alloca float
%TEMP11.y = alloca float
%TEMP11.x = alloca float
%TEMP10.w = alloca float
%TEMP10.z = alloca float
%TEMP10.y = alloca float
%TEMP10.x = alloca float
%TEMP9.w = alloca float
%TEMP9.z = alloca float
%TEMP9.y = alloca float
%TEMP9.x = alloca float
%TEMP8.w = alloca float
%TEMP8.z = alloca float
%TEMP8.y = alloca float
%TEMP8.x = alloca float
%TEMP7.w = alloca float
%TEMP7.z = alloca float
%TEMP7.y = alloca float
%TEMP7.x = alloca float
%TEMP6.w = alloca float
%TEMP6.z = alloca float
%TEMP6.y = alloca float
%TEMP6.x = alloca float
%TEMP5.w = alloca float
%TEMP5.z = alloca float
%TEMP5.y = alloca float
%TEMP5.x = alloca float
%TEMP4.w = alloca float
%TEMP4.z = alloca float
%TEMP4.y = alloca float
%TEMP4.x = alloca float
%TEMP3.w = alloca float
%TEMP3.z = alloca float
%TEMP3.y = alloca float
%TEMP3.x = alloca float
%TEMP2.w = alloca float
%TEMP2.z = alloca float
%TEMP2.y = alloca float
%TEMP2.x = alloca float
%TEMP1.w = alloca float
%TEMP1.z = alloca float
%TEMP1.y = alloca float
%TEMP1.x = alloca float
%TEMP0.w = alloca float
%TEMP0.z = alloca float
%TEMP0.y = alloca float
%TEMP0.x = alloca float
%OUT0.w = alloca float
%OUT0.z = alloca float
%OUT0.y = alloca float
%OUT0.x = alloca float
store float 0.000000e+00, float* %TEMP0.x
br label %loop1
loop1: ; preds = %endif5, %main_body
%22 = load float, float* %TEMP0.x
%23 = bitcast float %22 to i32
%24 = icmp sge i32 %23, 768
%25 = sext i1 %24 to i32
%26 = bitcast i32 %25 to float
store float %26, float* %TEMP1.x
%27 = load float, float* %TEMP1.x
%28 = bitcast float %27 to i32
%29 = icmp ne i32 %28, 0
br i1 %29, label %if3, label %endif5
if3: ; preds = %loop1
br label %endloop19
endif5: ; preds = %loop1
store float 0x36E0000000000000, float* %TEMP2.x
%30 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %3, i32 0, i32 15, !amdgpu.uniform !0
%31 = load <8 x i32>, <8 x i32> addrspace(2)* %30, !invariant.load !0
%32 = call nsz <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32 16, <8 x i32> %31, i32 15, i1 false, i1 false, i1 false, i1 false) #1
%33 = extractelement <4 x float> %32, i32 0
%34 = extractelement <4 x float> %32, i32 1
%35 = extractelement <4 x float> %32, i32 2
%36 = extractelement <4 x float> %32, i32 3
store float %33, float* %TEMP3.x
store float %34, float* %TEMP3.y
store float %35, float* %TEMP3.z
store float %36, float* %TEMP3.w
%37 = load float, float* %TEMP0.x
%38 = bitcast float %37 to i32
%39 = mul i32 2, %38
%40 = bitcast i32 %39 to float
store float %40, float* %TEMP4.x
%41 = load float, float* %TEMP4.x
store float %41, float* %TEMP5.x
%42 = load float, float* %TEMP3.x
%43 = bitcast float %42 to i32
%44 = add i32 %43, 1
%45 = load float, float* %TEMP3.y
%46 = bitcast float %45 to i32
%47 = add i32 %46, 1
%48 = load float, float* %TEMP3.z
%49 = bitcast float %48 to i32
%50 = add i32 %49, 1
%51 = load float, float* %TEMP3.w
%52 = bitcast float %51 to i32
%53 = add i32 %52, 1
%54 = bitcast i32 %44 to float
store float %54, float* %TEMP6.x
%55 = bitcast i32 %47 to float
store float %55, float* %TEMP6.y
%56 = bitcast i32 %50 to float
store float %56, float* %TEMP6.z
%57 = bitcast i32 %53 to float
store float %57, float* %TEMP6.w
%58 = load float, float* %TEMP6.x
%59 = load float, float* %TEMP6.y
%60 = load float, float* %TEMP6.z
%61 = load float, float* %TEMP6.w
%62 = insertelement <4 x float> undef, float %58, i32 0
%63 = insertelement <4 x float> %62, float %59, i32 1
%64 = insertelement <4 x float> %63, float %60, i32 2
%65 = insertelement <4 x float> %64, float %61, i32 3
%66 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %3, i32 0, i32 14, !amdgpu.uniform !0
%67 = load <8 x i32>, <8 x i32> addrspace(2)* %66, !invariant.load !0
%68 = extractelement <8 x i32> %67, i32 6
%69 = and i32 %68, -2097153
%70 = insertelement <8 x i32> %67, i32 %69, i32 6
%71 = load float, float* %TEMP4.x
%72 = bitcast float %71 to i32
call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %65, i32 %72, <8 x i32> %70, i32 15, i1 false, i1 false, i1 false, i1 false) #2
store float 0x36E0000000000000, float* %TEMP7.x
%73 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %3, i32 0, i32 15, !amdgpu.uniform !0
%74 = load <8 x i32>, <8 x i32> addrspace(2)* %73, !invariant.load !0
%75 = call nsz <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32 16, <8 x i32> %74, i32 15, i1 false, i1 false, i1 false, i1 false) #1
%76 = extractelement <4 x float> %75, i32 0
%77 = extractelement <4 x float> %75, i32 1
%78 = extractelement <4 x float> %75, i32 2
%79 = extractelement <4 x float> %75, i32 3
store float %76, float* %TEMP8.x
store float %77, float* %TEMP8.y
store float %78, float* %TEMP8.z
store float %79, float* %TEMP8.w
%80 = load float, float* %TEMP0.x
%81 = bitcast float %80 to i32
%82 = mul i32 2, %81
%83 = add i32 %82, 1
%84 = bitcast i32 %83 to float
store float %84, float* %TEMP9.x
%85 = load float, float* %TEMP9.x
store float %85, float* %TEMP10.x
%86 = load float, float* %TEMP8.x
%87 = bitcast float %86 to i32
%88 = add i32 %87, -1
%89 = load float, float* %TEMP8.y
%90 = bitcast float %89 to i32
%91 = add i32 %90, -1
%92 = load float, float* %TEMP8.z
%93 = bitcast float %92 to i32
%94 = add i32 %93, -1
%95 = load float, float* %TEMP8.w
%96 = bitcast float %95 to i32
%97 = add i32 %96, -1
%98 = bitcast i32 %88 to float
store float %98, float* %TEMP11.x
%99 = bitcast i32 %91 to float
store float %99, float* %TEMP11.y
%100 = bitcast i32 %94 to float
store float %100, float* %TEMP11.z
%101 = bitcast i32 %97 to float
store float %101, float* %TEMP11.w
%102 = load float, float* %TEMP11.x
%103 = load float, float* %TEMP11.y
%104 = load float, float* %TEMP11.z
%105 = load float, float* %TEMP11.w
%106 = insertelement <4 x float> undef, float %102, i32 0
%107 = insertelement <4 x float> %106, float %103, i32 1
%108 = insertelement <4 x float> %107, float %104, i32 2
%109 = insertelement <4 x float> %108, float %105, i32 3
%110 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %3, i32 0, i32 14, !amdgpu.uniform !0
%111 = load <8 x i32>, <8 x i32> addrspace(2)* %110, !invariant.load !0
%112 = extractelement <8 x i32> %111, i32 6
%113 = and i32 %112, -2097153
%114 = insertelement <8 x i32> %111, i32 %113, i32 6
%115 = load float, float* %TEMP9.x
%116 = bitcast float %115 to i32
call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %109, i32 %116, <8 x i32> %114, i32 15, i1 false, i1 false, i1 false, i1 false) #2
%117 = load float, float* %TEMP0.x
%118 = bitcast float %117 to i32
%119 = add i32 %118, 1
%120 = bitcast i32 %119 to float
store float %120, float* %TEMP0.x
br label %loop1
endloop19: ; preds = %if3
%121 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %5) #7
%122 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 1, i32 0, i32 %5) #7
%123 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 2, i32 0, i32 %5) #7
%124 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 3, i32 0, i32 %5) #7
%125 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %5) #7
%126 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 1, i32 0, i32 %5) #7
%127 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 2, i32 0, i32 %5) #7
%128 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 3, i32 0, i32 %5) #7
%129 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %5) #7
%130 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 1, i32 0, i32 %5) #7
%131 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 2, i32 0, i32 %5) #7
%132 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 3, i32 0, i32 %5) #7
%133 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %5) #7
%134 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 1, i32 0, i32 %5) #7
%135 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 2, i32 0, i32 %5) #7
%136 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 3, i32 0, i32 %5) #7
store float %121, float* %OUT0.x
store float %126, float* %OUT0.y
store float %131, float* %OUT0.z
store float %136, float* %OUT0.w
%137 = load float, float* %OUT0.x
%138 = load float, float* %OUT0.y
%139 = load float, float* %OUT0.z
%140 = load float, float* %OUT0.w
%141 = bitcast float %4 to i32
%142 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %141, 8
%143 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %142, float %137, 9
%144 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %143, float %138, 10
%145 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %144, float %139, 11
%146 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %145, float %140, 12
%147 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %146, float %20, 23
ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %147
}
; Function Attrs: nounwind readonly
declare <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32, <8 x i32>, i32, i1, i1, i1, i1) #1
; Function Attrs: nounwind writeonly
declare void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float>, i32, <8 x i32>, i32, i1, i1, i1, i1) #2
; Function Attrs: nounwind readnone speculatable
declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #3
; Function Attrs: alwaysinline
define private amdgpu_ps void @ps_epilog(i64 inreg, i64 inreg, i64 inreg, i64 inreg, float inreg, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float) #4 {
main_body:
call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float %5, float undef, float undef, float undef, i1 true, i1 true) #5
ret void
}
; Function Attrs: nounwind
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #5
define amdgpu_ps void @wrapper([12 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [0 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [80 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #6 {
main_body:
%22 = ptrtoint [12 x <4 x i32>] addrspace(2)* %0 to i64
%23 = bitcast i64 %22 to <2 x i32>
%24 = extractelement <2 x i32> %23, i32 0
%25 = extractelement <2 x i32> %23, i32 1
%26 = ptrtoint [0 x <8 x i32>] addrspace(2)* %1 to i64
%27 = bitcast i64 %26 to <2 x i32>
%28 = extractelement <2 x i32> %27, i32 0
%29 = extractelement <2 x i32> %27, i32 1
%30 = ptrtoint [0 x <4 x i32>] addrspace(2)* %2 to i64
%31 = bitcast i64 %30 to <2 x i32>
%32 = extractelement <2 x i32> %31, i32 0
%33 = extractelement <2 x i32> %31, i32 1
%34 = ptrtoint [80 x <8 x i32>] addrspace(2)* %3 to i64
%35 = bitcast i64 %34 to <2 x i32>
%36 = extractelement <2 x i32> %35, i32 0
%37 = extractelement <2 x i32> %35, i32 1
%38 = bitcast float %4 to i32
%39 = bitcast <2 x i32> %6 to <2 x float>
%40 = extractelement <2 x float> %39, i32 0
%41 = extractelement <2 x float> %39, i32 1
%42 = bitcast <2 x i32> %7 to <2 x float>
%43 = extractelement <2 x float> %42, i32 0
%44 = extractelement <2 x float> %42, i32 1
%45 = bitcast <2 x i32> %8 to <2 x float>
%46 = extractelement <2 x float> %45, i32 0
%47 = extractelement <2 x float> %45, i32 1
%48 = bitcast <3 x i32> %9 to <3 x float>
%49 = extractelement <3 x float> %48, i32 0
%50 = extractelement <3 x float> %48, i32 1
%51 = extractelement <3 x float> %48, i32 2
%52 = bitcast <2 x i32> %10 to <2 x float>
%53 = extractelement <2 x float> %52, i32 0
%54 = extractelement <2 x float> %52, i32 1
%55 = bitcast <2 x i32> %11 to <2 x float>
%56 = extractelement <2 x float> %55, i32 0
%57 = extractelement <2 x float> %55, i32 1
%58 = bitcast <2 x i32> %12 to <2 x float>
%59 = extractelement <2 x float> %58, i32 0
%60 = extractelement <2 x float> %58, i32 1
%61 = bitcast i32 %18 to float
%62 = bitcast i32 %19 to float
%63 = bitcast i32 %21 to float
%64 = insertelement <2 x i32> undef, i32 %24, i32 0
%65 = insertelement <2 x i32> %64, i32 %25, i32 1
%66 = bitcast <2 x i32> %65 to i64
%67 = inttoptr i64 %66 to [12 x <4 x i32>] addrspace(2)*
%68 = insertelement <2 x i32> undef, i32 %28, i32 0
%69 = insertelement <2 x i32> %68, i32 %29, i32 1
%70 = bitcast <2 x i32> %69 to i64
%71 = inttoptr i64 %70 to [0 x <8 x i32>] addrspace(2)*
%72 = insertelement <2 x i32> undef, i32 %32, i32 0
%73 = insertelement <2 x i32> %72, i32 %33, i32 1
%74 = bitcast <2 x i32> %73 to i64
%75 = inttoptr i64 %74 to [0 x <4 x i32>] addrspace(2)*
%76 = insertelement <2 x i32> undef, i32 %36, i32 0
%77 = insertelement <2 x i32> %76, i32 %37, i32 1
%78 = bitcast <2 x i32> %77 to i64
%79 = inttoptr i64 %78 to [80 x <8 x i32>] addrspace(2)*
%80 = bitcast i32 %38 to float
%81 = insertelement <2 x float> undef, float %40, i32 0
%82 = insertelement <2 x float> %81, float %41, i32 1
%83 = bitcast <2 x float> %82 to <2 x i32>
%84 = insertelement <2 x float> undef, float %43, i32 0
%85 = insertelement <2 x float> %84, float %44, i32 1
%86 = bitcast <2 x float> %85 to <2 x i32>
%87 = insertelement <2 x float> undef, float %46, i32 0
%88 = insertelement <2 x float> %87, float %47, i32 1
%89 = bitcast <2 x float> %88 to <2 x i32>
%90 = insertelement <3 x float> undef, float %49, i32 0
%91 = insertelement <3 x float> %90, float %50, i32 1
%92 = insertelement <3 x float> %91, float %51, i32 2
%93 = bitcast <3 x float> %92 to <3 x i32>
%94 = insertelement <2 x float> undef, float %53, i32 0
%95 = insertelement <2 x float> %94, float %54, i32 1
%96 = bitcast <2 x float> %95 to <2 x i32>
%97 = insertelement <2 x float> undef, float %56, i32 0
%98 = insertelement <2 x float> %97, float %57, i32 1
%99 = bitcast <2 x float> %98 to <2 x i32>
%100 = insertelement <2 x float> undef, float %59, i32 0
%101 = insertelement <2 x float> %100, float %60, i32 1
%102 = bitcast <2 x float> %101 to <2 x i32>
%103 = bitcast float %61 to i32
%104 = bitcast float %62 to i32
%105 = bitcast float %63 to i32
%106 = call <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([12 x <4 x i32>] addrspace(2)* %67, [0 x <8 x i32>] addrspace(2)* %71, [0 x <4 x i32>] addrspace(2)* %75, [80 x <8 x i32>] addrspace(2)* %79, float %80, i32 %5, <2 x i32> %83, <2 x i32> %86, <2 x i32> %89, <3 x i32> %93, <2 x i32> %96, <2 x i32> %99, <2 x i32> %102, float %13, float %14, float %15, float %16, float %17, i32 %103, i32 %104, float %20, i32 %105)
%107 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 0
%108 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 1
%109 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 2
%110 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 3
%111 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 4
%112 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 5
%113 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 6
%114 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 7
%115 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 8
%116 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 9
%117 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 10
%118 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 11
%119 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 12
%120 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 13
%121 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 14
%122 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 15
%123 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 16
%124 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 17
%125 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 18
%126 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 19
%127 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 20
%128 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 21
%129 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 22
%130 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 23
%131 = insertelement <2 x i32> undef, i32 %107, i32 0
%132 = insertelement <2 x i32> %131, i32 %108, i32 1
%133 = bitcast <2 x i32> %132 to i64
%134 = insertelement <2 x i32> undef, i32 %109, i32 0
%135 = insertelement <2 x i32> %134, i32 %110, i32 1
%136 = bitcast <2 x i32> %135 to i64
%137 = insertelement <2 x i32> undef, i32 %111, i32 0
%138 = insertelement <2 x i32> %137, i32 %112, i32 1
%139 = bitcast <2 x i32> %138 to i64
%140 = insertelement <2 x i32> undef, i32 %113, i32 0
%141 = insertelement <2 x i32> %140, i32 %114, i32 1
%142 = bitcast <2 x i32> %141 to i64
%143 = bitcast i32 %115 to float
call void @ps_epilog(i64 %133, i64 %136, i64 %139, i64 %142, float %143, float %116, float %117, float %118, float %119, float %120, float %121, float %122, float %123, float %124, float %125, float %126, float %127, float %128, float %129, float %130)
ret void
}
attributes #0 = { alwaysinline "no-signed-zeros-fp-math"="true" }
attributes #1 = { nounwind readonly }
attributes #2 = { nounwind writeonly }
attributes #3 = { nounwind readnone speculatable }
attributes #4 = { alwaysinline "InitialPSInputAddr"="16777215" "no-signed-zeros-fp-math"="true" }
attributes #5 = { nounwind }
attributes #6 = { "no-signed-zeros-fp-math"="true" }
attributes #7 = { nounwind readnone }
!0 = !{}
radeonsi: Compiling shader 6
SHADER KEY
part.ps.prolog.color_two_side = 0
part.ps.prolog.flatshade_colors = 0
part.ps.prolog.poly_stipple = 0
part.ps.prolog.force_persp_sample_interp = 0
part.ps.prolog.force_linear_sample_interp = 0
part.ps.prolog.force_persp_center_interp = 0
part.ps.prolog.force_linear_center_interp = 0
part.ps.prolog.bc_optimize_for_persp = 0
part.ps.prolog.bc_optimize_for_linear = 0
part.ps.epilog.spi_shader_col_format = 0x1
part.ps.epilog.color_is_int8 = 0x0
part.ps.epilog.color_is_int10 = 0x0
part.ps.epilog.last_cbuf = 0
part.ps.epilog.alpha_func = 7
part.ps.epilog.alpha_to_one = 0
part.ps.epilog.poly_line_smoothing = 0
part.ps.epilog.clamp_color = 0
Pixel Shader:
Shader main disassembly:
wrapper:
BB0_0:
s_load_dwordx8 s[20:27], s[6:7], 0x1c0 ; C00E0503 000001C0
s_load_dwordx8 s[12:19], s[6:7], 0x1e0 ; C00E0303 000001E0
s_mov_b32 s0, 0 ; BE800080
s_mov_b32 s1, 1 ; BE810081
v_mov_b32_e32 v0, 16 ; 7E000290
s_waitcnt lgkmcnt(0) ; BF8C007F
s_and_b32 s26, s26, 0xffdfffff ; 861AFF1A FFDFFFFF
s_cmpk_gt_i32 s0, 0x2ff ; B20002FF
s_cbranch_scc1 BB0_2 ; BF850000
BB0_1:
image_load v[1:4], v0, s[12:19] dmask:0xf unorm ; F0001F00 00030100
s_add_i32 s2, s1, -1 ; 8102C101
v_mov_b32_e32 v5, s2 ; 7E0A0202
s_add_i32 s0, s0, 1 ; 81008100
s_waitcnt vmcnt(0) ; BF8C0F70
v_add_u32_e32 v1, vcc, 1, v1 ; 32020281
v_add_u32_e32 v2, vcc, 1, v2 ; 32040481
v_add_u32_e32 v3, vcc, 1, v3 ; 32060681
v_add_u32_e32 v4, vcc, 1, v4 ; 32080881
image_store v[1:4], v5, s[20:27] dmask:0xf unorm ; F0201F00 00050105
s_waitcnt expcnt(0) ; BF8C0F0F
image_load v[1:4], v0, s[12:19] dmask:0xf unorm ; F0001F00 00030100
v_mov_b32_e32 v5, s1 ; 7E0A0201
s_add_i32 s1, s1, 2 ; 81018201
s_waitcnt vmcnt(0) ; BF8C0F70
v_add_u32_e32 v1, vcc, -1, v1 ; 320202C1
v_add_u32_e32 v2, vcc, -1, v2 ; 320404C1
v_add_u32_e32 v3, vcc, -1, v3 ; 320606C1
v_add_u32_e32 v4, vcc, -1, v4 ; 320808C1
image_store v[1:4], v5, s[20:27] dmask:0xf unorm ; F0201F00 00050105
s_cmpk_gt_i32 s0, 0x2ff ; B20002FF
s_cbranch_scc0 BB0_1 ; BF840000
BB0_2:
s_mov_b32 m0, s9 ; BEFC0009
v_interp_mov_f32 v0, p0, attr0.x ; D4020002
exp mrt0 v0, off, off, off done vm ; C4001801 00000000
s_endpgm ; BF810000
*** SHADER CONFIG ***
SPI_PS_INPUT_ADDR = 0x0001
SPI_PS_INPUT_ENA = 0x0001
*** SHADER STATS ***
SGPRS: 96
VGPRS: 8
Spilled SGPRs: 0
Spilled VGPRs: 0
Private memory VGPRs: 0
Code Size: 172 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
Max Waves: 8
********************
FRAG
DCL IN[0], GENERIC[0], CONSTANT
DCL OUT[0], COLOR
DCL IMAGE[0], 1D, PIPE_FORMAT_R32_UINT, WR
DCL IMAGE[1], 1D, PIPE_FORMAT_R32_UINT, WR
DCL TEMP[0..11], LOCAL
IMM[0] INT32 {0, 768, 16, 2}
IMM[1] UINT32 {1, 4294967295, 0, 0}
IMM[2] INT32 {1, 0, 0, 0}
0: MOV TEMP[0].x, IMM[0].xxxx
1: BGNLOOP
2: ISGE TEMP[1].x, TEMP[0].xxxx, IMM[0].yyyy
3: UIF TEMP[1].xxxx
4: BRK
5: ENDIF
6: MOV TEMP[2].x, IMM[0].zzzz
7: LOAD TEMP[3], IMAGE[0], IMM[0].zzzz, RESTRICT, 1D, PIPE_FORMAT_R32_UINT
8: UMUL TEMP[4].x, IMM[0].wwww, TEMP[0].xxxx
9: MOV TEMP[5].x, TEMP[4].xxxx
10: UADD TEMP[6], TEMP[3], IMM[1].xxxx
11: STORE IMAGE[1], TEMP[4].xxxx, TEMP[6], RESTRICT, 1D, PIPE_FORMAT_R32_UINT
12: MOV TEMP[7].x, IMM[0].zzzz
13: LOAD TEMP[8], IMAGE[0], IMM[0].zzzz, RESTRICT, 1D, PIPE_FORMAT_R32_UINT
14: UMAD TEMP[9].x, IMM[0].wwww, TEMP[0].xxxx, IMM[2].xxxx
15: MOV TEMP[10].x, TEMP[9].xxxx
16: UADD TEMP[11], TEMP[8], IMM[1].yyyy
17: STORE IMAGE[1], TEMP[9].xxxx, TEMP[11], RESTRICT, 1D, PIPE_FORMAT_R32_UINT
18: UADD TEMP[0].x, TEMP[0].xxxx, IMM[2].xxxx
19: ENDLOOP
20: MOV OUT[0], IN[0]
21: END
; ModuleID = 'tgsi'
source_filename = "tgsi"
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
target triple = "amdgcn--"
; Function Attrs: alwaysinline
define private amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([12 x <4 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), [0 x <4 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), [80 x <8 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 {
main_body:
%TEMP11.w = alloca float
%TEMP11.z = alloca float
%TEMP11.y = alloca float
%TEMP11.x = alloca float
%TEMP10.w = alloca float
%TEMP10.z = alloca float
%TEMP10.y = alloca float
%TEMP10.x = alloca float
%TEMP9.w = alloca float
%TEMP9.z = alloca float
%TEMP9.y = alloca float
%TEMP9.x = alloca float
%TEMP8.w = alloca float
%TEMP8.z = alloca float
%TEMP8.y = alloca float
%TEMP8.x = alloca float
%TEMP7.w = alloca float
%TEMP7.z = alloca float
%TEMP7.y = alloca float
%TEMP7.x = alloca float
%TEMP6.w = alloca float
%TEMP6.z = alloca float
%TEMP6.y = alloca float
%TEMP6.x = alloca float
%TEMP5.w = alloca float
%TEMP5.z = alloca float
%TEMP5.y = alloca float
%TEMP5.x = alloca float
%TEMP4.w = alloca float
%TEMP4.z = alloca float
%TEMP4.y = alloca float
%TEMP4.x = alloca float
%TEMP3.w = alloca float
%TEMP3.z = alloca float
%TEMP3.y = alloca float
%TEMP3.x = alloca float
%TEMP2.w = alloca float
%TEMP2.z = alloca float
%TEMP2.y = alloca float
%TEMP2.x = alloca float
%TEMP1.w = alloca float
%TEMP1.z = alloca float
%TEMP1.y = alloca float
%TEMP1.x = alloca float
%TEMP0.w = alloca float
%TEMP0.z = alloca float
%TEMP0.y = alloca float
%TEMP0.x = alloca float
%OUT0.w = alloca float
%OUT0.z = alloca float
%OUT0.y = alloca float
%OUT0.x = alloca float
store float 0.000000e+00, float* %TEMP0.x
br label %loop1
loop1: ; preds = %endif5, %main_body
%22 = load float, float* %TEMP0.x
%23 = bitcast float %22 to i32
%24 = icmp sge i32 %23, 768
%25 = sext i1 %24 to i32
%26 = bitcast i32 %25 to float
store float %26, float* %TEMP1.x
%27 = load float, float* %TEMP1.x
%28 = bitcast float %27 to i32
%29 = icmp ne i32 %28, 0
br i1 %29, label %if3, label %endif5
if3: ; preds = %loop1
br label %endloop19
endif5: ; preds = %loop1
store float 0x36E0000000000000, float* %TEMP2.x
%30 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %3, i32 0, i32 15, !amdgpu.uniform !0
%31 = load <8 x i32>, <8 x i32> addrspace(2)* %30, !invariant.load !0
%32 = call nsz <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32 16, <8 x i32> %31, i32 15, i1 false, i1 false, i1 false, i1 false) #7
%33 = extractelement <4 x float> %32, i32 0
%34 = extractelement <4 x float> %32, i32 1
%35 = extractelement <4 x float> %32, i32 2
%36 = extractelement <4 x float> %32, i32 3
store float %33, float* %TEMP3.x
store float %34, float* %TEMP3.y
store float %35, float* %TEMP3.z
store float %36, float* %TEMP3.w
%37 = load float, float* %TEMP0.x
%38 = bitcast float %37 to i32
%39 = mul i32 2, %38
%40 = bitcast i32 %39 to float
store float %40, float* %TEMP4.x
%41 = load float, float* %TEMP4.x
store float %41, float* %TEMP5.x
%42 = load float, float* %TEMP3.x
%43 = bitcast float %42 to i32
%44 = add i32 %43, 1
%45 = load float, float* %TEMP3.y
%46 = bitcast float %45 to i32
%47 = add i32 %46, 1
%48 = load float, float* %TEMP3.z
%49 = bitcast float %48 to i32
%50 = add i32 %49, 1
%51 = load float, float* %TEMP3.w
%52 = bitcast float %51 to i32
%53 = add i32 %52, 1
%54 = bitcast i32 %44 to float
store float %54, float* %TEMP6.x
%55 = bitcast i32 %47 to float
store float %55, float* %TEMP6.y
%56 = bitcast i32 %50 to float
store float %56, float* %TEMP6.z
%57 = bitcast i32 %53 to float
store float %57, float* %TEMP6.w
%58 = load float, float* %TEMP6.x
%59 = load float, float* %TEMP6.y
%60 = load float, float* %TEMP6.z
%61 = load float, float* %TEMP6.w
%62 = insertelement <4 x float> undef, float %58, i32 0
%63 = insertelement <4 x float> %62, float %59, i32 1
%64 = insertelement <4 x float> %63, float %60, i32 2
%65 = insertelement <4 x float> %64, float %61, i32 3
%66 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %3, i32 0, i32 14, !amdgpu.uniform !0
%67 = load <8 x i32>, <8 x i32> addrspace(2)* %66, !invariant.load !0
%68 = extractelement <8 x i32> %67, i32 6
%69 = and i32 %68, -2097153
%70 = insertelement <8 x i32> %67, i32 %69, i32 6
%71 = load float, float* %TEMP4.x
%72 = bitcast float %71 to i32
call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %65, i32 %72, <8 x i32> %70, i32 15, i1 false, i1 false, i1 false, i1 false) #8
store float 0x36E0000000000000, float* %TEMP7.x
%73 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %3, i32 0, i32 15, !amdgpu.uniform !0
%74 = load <8 x i32>, <8 x i32> addrspace(2)* %73, !invariant.load !0
%75 = call nsz <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32 16, <8 x i32> %74, i32 15, i1 false, i1 false, i1 false, i1 false) #7
%76 = extractelement <4 x float> %75, i32 0
%77 = extractelement <4 x float> %75, i32 1
%78 = extractelement <4 x float> %75, i32 2
%79 = extractelement <4 x float> %75, i32 3
store float %76, float* %TEMP8.x
store float %77, float* %TEMP8.y
store float %78, float* %TEMP8.z
store float %79, float* %TEMP8.w
%80 = load float, float* %TEMP0.x
%81 = bitcast float %80 to i32
%82 = mul i32 2, %81
%83 = add i32 %82, 1
%84 = bitcast i32 %83 to float
store float %84, float* %TEMP9.x
%85 = load float, float* %TEMP9.x
store float %85, float* %TEMP10.x
%86 = load float, float* %TEMP8.x
%87 = bitcast float %86 to i32
%88 = add i32 %87, -1
%89 = load float, float* %TEMP8.y
%90 = bitcast float %89 to i32
%91 = add i32 %90, -1
%92 = load float, float* %TEMP8.z
%93 = bitcast float %92 to i32
%94 = add i32 %93, -1
%95 = load float, float* %TEMP8.w
%96 = bitcast float %95 to i32
%97 = add i32 %96, -1
%98 = bitcast i32 %88 to float
store float %98, float* %TEMP11.x
%99 = bitcast i32 %91 to float
store float %99, float* %TEMP11.y
%100 = bitcast i32 %94 to float
store float %100, float* %TEMP11.z
%101 = bitcast i32 %97 to float
store float %101, float* %TEMP11.w
%102 = load float, float* %TEMP11.x
%103 = load float, float* %TEMP11.y
%104 = load float, float* %TEMP11.z
%105 = load float, float* %TEMP11.w
%106 = insertelement <4 x float> undef, float %102, i32 0
%107 = insertelement <4 x float> %106, float %103, i32 1
%108 = insertelement <4 x float> %107, float %104, i32 2
%109 = insertelement <4 x float> %108, float %105, i32 3
%110 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %3, i32 0, i32 14, !amdgpu.uniform !0
%111 = load <8 x i32>, <8 x i32> addrspace(2)* %110, !invariant.load !0
%112 = extractelement <8 x i32> %111, i32 6
%113 = and i32 %112, -2097153
%114 = insertelement <8 x i32> %111, i32 %113, i32 6
%115 = load float, float* %TEMP9.x
%116 = bitcast float %115 to i32
call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %109, i32 %116, <8 x i32> %114, i32 15, i1 false, i1 false, i1 false, i1 false) #8
%117 = load float, float* %TEMP0.x
%118 = bitcast float %117 to i32
%119 = add i32 %118, 1
%120 = bitcast i32 %119 to float
store float %120, float* %TEMP0.x
br label %loop1
endloop19: ; preds = %if3
%121 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %5) #7
%122 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 1, i32 0, i32 %5) #7
%123 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 2, i32 0, i32 %5) #7
%124 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 3, i32 0, i32 %5) #7
%125 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %5) #7
%126 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 1, i32 0, i32 %5) #7
%127 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 2, i32 0, i32 %5) #7
%128 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 3, i32 0, i32 %5) #7
%129 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %5) #7
%130 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 1, i32 0, i32 %5) #7
%131 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 2, i32 0, i32 %5) #7
%132 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 3, i32 0, i32 %5) #7
%133 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %5) #7
%134 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 1, i32 0, i32 %5) #7
%135 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 2, i32 0, i32 %5) #7
%136 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 3, i32 0, i32 %5) #7
store float %121, float* %OUT0.x
store float %126, float* %OUT0.y
store float %131, float* %OUT0.z
store float %136, float* %OUT0.w
%137 = load float, float* %OUT0.x
%138 = load float, float* %OUT0.y
%139 = load float, float* %OUT0.z
%140 = load float, float* %OUT0.w
%141 = bitcast float %4 to i32
%142 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %141, 8
%143 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %142, float %137, 9
%144 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %143, float %138, 10
%145 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %144, float %139, 11
%146 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %145, float %140, 12
%147 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %146, float %20, 23
ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %147
}
; Function Attrs: nounwind readonly
declare <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32, <8 x i32>, i32, i1, i1, i1, i1) #1
; Function Attrs: nounwind writeonly
declare void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float>, i32, <8 x i32>, i32, i1, i1, i1, i1) #2
; Function Attrs: nounwind readnone speculatable
declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #3
; Function Attrs: alwaysinline
define private amdgpu_ps void @ps_epilog(i64 inreg, i64 inreg, i64 inreg, i64 inreg, float inreg, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float) #4 {
main_body:
call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float %5, float undef, float undef, float undef, i1 true, i1 true) #5
ret void
}
; Function Attrs: nounwind
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #5
define amdgpu_ps void @wrapper([12 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [0 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [80 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #6 {
main_body:
%22 = ptrtoint [12 x <4 x i32>] addrspace(2)* %0 to i64
%23 = bitcast i64 %22 to <2 x i32>
%24 = extractelement <2 x i32> %23, i32 0
%25 = extractelement <2 x i32> %23, i32 1
%26 = ptrtoint [0 x <8 x i32>] addrspace(2)* %1 to i64
%27 = bitcast i64 %26 to <2 x i32>
%28 = extractelement <2 x i32> %27, i32 0
%29 = extractelement <2 x i32> %27, i32 1
%30 = ptrtoint [0 x <4 x i32>] addrspace(2)* %2 to i64
%31 = bitcast i64 %30 to <2 x i32>
%32 = extractelement <2 x i32> %31, i32 0
%33 = extractelement <2 x i32> %31, i32 1
%34 = ptrtoint [80 x <8 x i32>] addrspace(2)* %3 to i64
%35 = bitcast i64 %34 to <2 x i32>
%36 = extractelement <2 x i32> %35, i32 0
%37 = extractelement <2 x i32> %35, i32 1
%38 = bitcast float %4 to i32
%39 = bitcast <2 x i32> %6 to <2 x float>
%40 = extractelement <2 x float> %39, i32 0
%41 = extractelement <2 x float> %39, i32 1
%42 = bitcast <2 x i32> %7 to <2 x float>
%43 = extractelement <2 x float> %42, i32 0
%44 = extractelement <2 x float> %42, i32 1
%45 = bitcast <2 x i32> %8 to <2 x float>
%46 = extractelement <2 x float> %45, i32 0
%47 = extractelement <2 x float> %45, i32 1
%48 = bitcast <3 x i32> %9 to <3 x float>
%49 = extractelement <3 x float> %48, i32 0
%50 = extractelement <3 x float> %48, i32 1
%51 = extractelement <3 x float> %48, i32 2
%52 = bitcast <2 x i32> %10 to <2 x float>
%53 = extractelement <2 x float> %52, i32 0
%54 = extractelement <2 x float> %52, i32 1
%55 = bitcast <2 x i32> %11 to <2 x float>
%56 = extractelement <2 x float> %55, i32 0
%57 = extractelement <2 x float> %55, i32 1
%58 = bitcast <2 x i32> %12 to <2 x float>
%59 = extractelement <2 x float> %58, i32 0
%60 = extractelement <2 x float> %58, i32 1
%61 = bitcast i32 %18 to float
%62 = bitcast i32 %19 to float
%63 = bitcast i32 %21 to float
%64 = insertelement <2 x i32> undef, i32 %24, i32 0
%65 = insertelement <2 x i32> %64, i32 %25, i32 1
%66 = bitcast <2 x i32> %65 to i64
%67 = inttoptr i64 %66 to [12 x <4 x i32>] addrspace(2)*
%68 = insertelement <2 x i32> undef, i32 %28, i32 0
%69 = insertelement <2 x i32> %68, i32 %29, i32 1
%70 = bitcast <2 x i32> %69 to i64
%71 = inttoptr i64 %70 to [0 x <8 x i32>] addrspace(2)*
%72 = insertelement <2 x i32> undef, i32 %32, i32 0
%73 = insertelement <2 x i32> %72, i32 %33, i32 1
%74 = bitcast <2 x i32> %73 to i64
%75 = inttoptr i64 %74 to [0 x <4 x i32>] addrspace(2)*
%76 = insertelement <2 x i32> undef, i32 %36, i32 0
%77 = insertelement <2 x i32> %76, i32 %37, i32 1
%78 = bitcast <2 x i32> %77 to i64
%79 = inttoptr i64 %78 to [80 x <8 x i32>] addrspace(2)*
%80 = bitcast i32 %38 to float
%81 = insertelement <2 x float> undef, float %40, i32 0
%82 = insertelement <2 x float> %81, float %41, i32 1
%83 = bitcast <2 x float> %82 to <2 x i32>
%84 = insertelement <2 x float> undef, float %43, i32 0
%85 = insertelement <2 x float> %84, float %44, i32 1
%86 = bitcast <2 x float> %85 to <2 x i32>
%87 = insertelement <2 x float> undef, float %46, i32 0
%88 = insertelement <2 x float> %87, float %47, i32 1
%89 = bitcast <2 x float> %88 to <2 x i32>
%90 = insertelement <3 x float> undef, float %49, i32 0
%91 = insertelement <3 x float> %90, float %50, i32 1
%92 = insertelement <3 x float> %91, float %51, i32 2
%93 = bitcast <3 x float> %92 to <3 x i32>
%94 = insertelement <2 x float> undef, float %53, i32 0
%95 = insertelement <2 x float> %94, float %54, i32 1
%96 = bitcast <2 x float> %95 to <2 x i32>
%97 = insertelement <2 x float> undef, float %56, i32 0
%98 = insertelement <2 x float> %97, float %57, i32 1
%99 = bitcast <2 x float> %98 to <2 x i32>
%100 = insertelement <2 x float> undef, float %59, i32 0
%101 = insertelement <2 x float> %100, float %60, i32 1
%102 = bitcast <2 x float> %101 to <2 x i32>
%103 = bitcast float %61 to i32
%104 = bitcast float %62 to i32
%105 = bitcast float %63 to i32
%106 = call <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([12 x <4 x i32>] addrspace(2)* %67, [0 x <8 x i32>] addrspace(2)* %71, [0 x <4 x i32>] addrspace(2)* %75, [80 x <8 x i32>] addrspace(2)* %79, float %80, i32 %5, <2 x i32> %83, <2 x i32> %86, <2 x i32> %89, <3 x i32> %93, <2 x i32> %96, <2 x i32> %99, <2 x i32> %102, float %13, float %14, float %15, float %16, float %17, i32 %103, i32 %104, float %20, i32 %105)
%107 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 0
%108 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 1
%109 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 2
%110 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 3
%111 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 4
%112 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 5
%113 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 6
%114 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 7
%115 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 8
%116 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 9
%117 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 10
%118 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 11
%119 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 12
%120 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 13
%121 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 14
%122 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 15
%123 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 16
%124 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 17
%125 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 18
%126 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 19
%127 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 20
%128 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 21
%129 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 22
%130 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 23
%131 = insertelement <2 x i32> undef, i32 %107, i32 0
%132 = insertelement <2 x i32> %131, i32 %108, i32 1
%133 = bitcast <2 x i32> %132 to i64
%134 = insertelement <2 x i32> undef, i32 %109, i32 0
%135 = insertelement <2 x i32> %134, i32 %110, i32 1
%136 = bitcast <2 x i32> %135 to i64
%137 = insertelement <2 x i32> undef, i32 %111, i32 0
%138 = insertelement <2 x i32> %137, i32 %112, i32 1
%139 = bitcast <2 x i32> %138 to i64
%140 = insertelement <2 x i32> undef, i32 %113, i32 0
%141 = insertelement <2 x i32> %140, i32 %114, i32 1
%142 = bitcast <2 x i32> %141 to i64
%143 = bitcast i32 %115 to float
call void @ps_epilog(i64 %133, i64 %136, i64 %139, i64 %142, float %143, float %116, float %117, float %118, float %119, float %120, float %121, float %122, float %123, float %124, float %125, float %126, float %127, float %128, float %129, float %130)
ret void
}
attributes #0 = { alwaysinline "no-signed-zeros-fp-math"="true" }
attributes #1 = { nounwind readonly }
attributes #2 = { nounwind writeonly }
attributes #3 = { nounwind readnone speculatable }
attributes #4 = { alwaysinline "InitialPSInputAddr"="16777215" "no-signed-zeros-fp-math"="true" }
attributes #5 = { nounwind }
attributes #6 = { "no-signed-zeros-fp-math"="true" }
attributes #7 = { nounwind readnone }
attributes #8 = { inaccessiblememonly nounwind }
!0 = !{}
radeonsi: Compiling shader 8
SHADER KEY
part.ps.prolog.color_two_side = 0
part.ps.prolog.flatshade_colors = 0
part.ps.prolog.poly_stipple = 0
part.ps.prolog.force_persp_sample_interp = 0
part.ps.prolog.force_linear_sample_interp = 0
part.ps.prolog.force_persp_center_interp = 0
part.ps.prolog.force_linear_center_interp = 0
part.ps.prolog.bc_optimize_for_persp = 0
part.ps.prolog.bc_optimize_for_linear = 0
part.ps.epilog.spi_shader_col_format = 0x1
part.ps.epilog.color_is_int8 = 0x0
part.ps.epilog.color_is_int10 = 0x0
part.ps.epilog.last_cbuf = 0
part.ps.epilog.alpha_func = 7
part.ps.epilog.alpha_to_one = 0
part.ps.epilog.poly_line_smoothing = 0
part.ps.epilog.clamp_color = 0
Pixel Shader:
Shader main disassembly:
wrapper:
BB0_0:
s_load_dwordx8 s[20:27], s[6:7], 0x1c0 ; C00E0503 000001C0
s_load_dwordx8 s[12:19], s[6:7], 0x1e0 ; C00E0303 000001E0
s_mov_b32 s0, 0 ; BE800080
s_mov_b32 s1, 1 ; BE810081
v_mov_b32_e32 v0, 16 ; 7E000290
s_waitcnt lgkmcnt(0) ; BF8C007F
s_and_b32 s26, s26, 0xffdfffff ; 861AFF1A FFDFFFFF
s_cmpk_gt_i32 s0, 0x2ff ; B20002FF
s_cbranch_scc1 BB0_2 ; BF850000
BB0_1:
image_load v[1:4], v0, s[12:19] dmask:0xf unorm ; F0001F00 00030100
s_add_i32 s2, s1, -1 ; 8102C101
v_mov_b32_e32 v9, s2 ; 7E120202
s_add_i32 s0, s0, 1 ; 81008100
s_waitcnt vmcnt(0) ; BF8C0F70
v_add_u32_e32 v5, vcc, 1, v1 ; 320A0281
v_add_u32_e32 v6, vcc, 1, v2 ; 320C0481
v_add_u32_e32 v7, vcc, 1, v3 ; 320E0681
v_add_u32_e32 v8, vcc, 1, v4 ; 32100881
image_store v[5:8], v9, s[20:27] dmask:0xf unorm ; F0201F00 00050509
s_waitcnt expcnt(0) ; BF8C0F0F
v_mov_b32_e32 v5, s1 ; 7E0A0201
v_add_u32_e32 v1, vcc, -1, v1 ; 320202C1
v_add_u32_e32 v2, vcc, -1, v2 ; 320404C1
v_add_u32_e32 v3, vcc, -1, v3 ; 320606C1
v_add_u32_e32 v4, vcc, -1, v4 ; 320808C1
image_store v[1:4], v5, s[20:27] dmask:0xf unorm ; F0201F00 00050105
s_add_i32 s1, s1, 2 ; 81018201
s_cmpk_gt_i32 s0, 0x2ff ; B20002FF
s_cbranch_scc0 BB0_1 ; BF840000
BB0_2:
s_mov_b32 m0, s9 ; BEFC0009
v_interp_mov_f32 v0, p0, attr0.x ; D4020002
exp mrt0 v0, off, off, off done vm ; C4001801 00000000
s_endpgm ; BF810000
*** SHADER CONFIG ***
SPI_PS_INPUT_ADDR = 0x0001
SPI_PS_INPUT_ENA = 0x0001
*** SHADER STATS ***
SGPRS: 96
VGPRS: 12
Spilled SGPRs: 0
Spilled VGPRs: 0
Private memory VGPRs: 0
Code Size: 160 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
Max Waves: 8
********************
More information about the llvm-commits
mailing list