[llvm] r324554 - AMDGPU: Fix incorrect reordering when inline asm defines LDS address
Michel Dänzer via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 8 09:35:50 PST 2018
Hi Matt,
On 2018-02-08 02:56 AM, Matt Arsenault via llvm-commits wrote:
> Author: arsenm
> Date: Wed Feb 7 17:56:14 2018
> New Revision: 324554
>
> URL: http://llvm.org/viewvc/llvm-project?rev=324554&view=rev
> Log:
> AMDGPU: Fix incorrect reordering when inline asm defines LDS address
>
> Defs of operands outside of the instruction's explicit defs need
> to be checked.
This change broke a bunch of
spec at arb_gpu_shader_int64@execution at built-in-functions piglit tests with
radeonsi. I'm attaching good and bad code for one of them.
--
Earthling Michel Dänzer | http://www.amd.com
Libre software enthusiast | Mesa and X developer
-------------- next part --------------
; ModuleID = 'tgsi'
source_filename = "tgsi"
target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"
target triple = "amdgcn--"
; Function Attrs: alwaysinline
define private amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([0 x <4 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), [0 x float] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 {
main_body:
%TEMP5.w = alloca float, addrspace(5)
%TEMP5.z = alloca float, addrspace(5)
%TEMP5.y = alloca float, addrspace(5)
%TEMP5.x = alloca float, addrspace(5)
%TEMP4.w = alloca float, addrspace(5)
%TEMP4.z = alloca float, addrspace(5)
%TEMP4.y = alloca float, addrspace(5)
%TEMP4.x = alloca float, addrspace(5)
%TEMP3.w = alloca float, addrspace(5)
%TEMP3.z = alloca float, addrspace(5)
%TEMP3.y = alloca float, addrspace(5)
%TEMP3.x = alloca float, addrspace(5)
%TEMP2.w = alloca float, addrspace(5)
%TEMP2.z = alloca float, addrspace(5)
%TEMP2.y = alloca float, addrspace(5)
%TEMP2.x = alloca float, addrspace(5)
%TEMP1.w = alloca float, addrspace(5)
%TEMP1.z = alloca float, addrspace(5)
%TEMP1.y = alloca float, addrspace(5)
%TEMP1.x = alloca float, addrspace(5)
%TEMP0.w = alloca float, addrspace(5)
%TEMP0.z = alloca float, addrspace(5)
%TEMP0.y = alloca float, addrspace(5)
%TEMP0.x = alloca float, addrspace(5)
%OUT0.w = alloca float, addrspace(5)
%OUT0.z = alloca float, addrspace(5)
%OUT0.y = alloca float, addrspace(5)
%OUT0.x = alloca float, addrspace(5)
%22 = ptrtoint [0 x float] addrspace(2)* %2 to i64
%23 = bitcast i64 %22 to <2 x i32>
%24 = extractelement <2 x i32> %23, i32 0
%25 = extractelement <2 x i32> %23, i32 1
%26 = insertelement <4 x i32> undef, i32 %24, i32 0
%27 = insertelement <4 x i32> %26, i32 %25, i32 1
%28 = insertelement <4 x i32> %27, i32 96, i32 2
%29 = insertelement <4 x i32> %28, i32 163756, i32 3
%30 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %29, i32 0)
%31 = bitcast float %30 to i32
%32 = ptrtoint [0 x float] addrspace(2)* %2 to i64
%33 = bitcast i64 %32 to <2 x i32>
%34 = extractelement <2 x i32> %33, i32 0
%35 = extractelement <2 x i32> %33, i32 1
%36 = insertelement <4 x i32> undef, i32 %34, i32 0
%37 = insertelement <4 x i32> %36, i32 %35, i32 1
%38 = insertelement <4 x i32> %37, i32 96, i32 2
%39 = insertelement <4 x i32> %38, i32 163756, i32 3
%40 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %39, i32 4)
%41 = bitcast float %40 to i32
%42 = insertelement <2 x i32> undef, i32 %31, i32 0
%43 = insertelement <2 x i32> %42, i32 %41, i32 1
%44 = bitcast <2 x i32> %43 to i64
%45 = ptrtoint [0 x float] addrspace(2)* %2 to i64
%46 = bitcast i64 %45 to <2 x i32>
%47 = extractelement <2 x i32> %46, i32 0
%48 = extractelement <2 x i32> %46, i32 1
%49 = insertelement <4 x i32> undef, i32 %47, i32 0
%50 = insertelement <4 x i32> %49, i32 %48, i32 1
%51 = insertelement <4 x i32> %50, i32 96, i32 2
%52 = insertelement <4 x i32> %51, i32 163756, i32 3
%53 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %52, i32 32)
%54 = bitcast float %53 to i32
%55 = ptrtoint [0 x float] addrspace(2)* %2 to i64
%56 = bitcast i64 %55 to <2 x i32>
%57 = extractelement <2 x i32> %56, i32 0
%58 = extractelement <2 x i32> %56, i32 1
%59 = insertelement <4 x i32> undef, i32 %57, i32 0
%60 = insertelement <4 x i32> %59, i32 %58, i32 1
%61 = insertelement <4 x i32> %60, i32 96, i32 2
%62 = insertelement <4 x i32> %61, i32 163756, i32 3
%63 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %62, i32 36)
%64 = bitcast float %63 to i32
%65 = insertelement <2 x i32> undef, i32 %54, i32 0
%66 = insertelement <2 x i32> %65, i32 %64, i32 1
%67 = bitcast <2 x i32> %66 to i64
%68 = srem i64 %44, %67
%69 = bitcast i64 %68 to <2 x i32>
%70 = extractelement <2 x i32> %69, i32 0
%71 = extractelement <2 x i32> %69, i32 1
%72 = bitcast i32 %70 to float
store float %72, float addrspace(5)* %TEMP0.x
%73 = bitcast i32 %71 to float
store float %73, float addrspace(5)* %TEMP0.y
%74 = ptrtoint [0 x float] addrspace(2)* %2 to i64
%75 = bitcast i64 %74 to <2 x i32>
%76 = extractelement <2 x i32> %75, i32 0
%77 = extractelement <2 x i32> %75, i32 1
%78 = insertelement <4 x i32> undef, i32 %76, i32 0
%79 = insertelement <4 x i32> %78, i32 %77, i32 1
%80 = insertelement <4 x i32> %79, i32 96, i32 2
%81 = insertelement <4 x i32> %80, i32 163756, i32 3
%82 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %81, i32 8)
%83 = bitcast float %82 to i32
%84 = ptrtoint [0 x float] addrspace(2)* %2 to i64
%85 = bitcast i64 %84 to <2 x i32>
%86 = extractelement <2 x i32> %85, i32 0
%87 = extractelement <2 x i32> %85, i32 1
%88 = insertelement <4 x i32> undef, i32 %86, i32 0
%89 = insertelement <4 x i32> %88, i32 %87, i32 1
%90 = insertelement <4 x i32> %89, i32 96, i32 2
%91 = insertelement <4 x i32> %90, i32 163756, i32 3
%92 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %91, i32 12)
%93 = bitcast float %92 to i32
%94 = insertelement <2 x i32> undef, i32 %83, i32 0
%95 = insertelement <2 x i32> %94, i32 %93, i32 1
%96 = bitcast <2 x i32> %95 to i64
%97 = ptrtoint [0 x float] addrspace(2)* %2 to i64
%98 = bitcast i64 %97 to <2 x i32>
%99 = extractelement <2 x i32> %98, i32 0
%100 = extractelement <2 x i32> %98, i32 1
%101 = insertelement <4 x i32> undef, i32 %99, i32 0
%102 = insertelement <4 x i32> %101, i32 %100, i32 1
%103 = insertelement <4 x i32> %102, i32 96, i32 2
%104 = insertelement <4 x i32> %103, i32 163756, i32 3
%105 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %104, i32 40)
%106 = bitcast float %105 to i32
%107 = ptrtoint [0 x float] addrspace(2)* %2 to i64
%108 = bitcast i64 %107 to <2 x i32>
%109 = extractelement <2 x i32> %108, i32 0
%110 = extractelement <2 x i32> %108, i32 1
%111 = insertelement <4 x i32> undef, i32 %109, i32 0
%112 = insertelement <4 x i32> %111, i32 %110, i32 1
%113 = insertelement <4 x i32> %112, i32 96, i32 2
%114 = insertelement <4 x i32> %113, i32 163756, i32 3
%115 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %114, i32 44)
%116 = bitcast float %115 to i32
%117 = insertelement <2 x i32> undef, i32 %106, i32 0
%118 = insertelement <2 x i32> %117, i32 %116, i32 1
%119 = bitcast <2 x i32> %118 to i64
%120 = srem i64 %96, %119
%121 = bitcast i64 %120 to <2 x i32>
%122 = extractelement <2 x i32> %121, i32 0
%123 = extractelement <2 x i32> %121, i32 1
%124 = bitcast i32 %122 to float
store float %124, float addrspace(5)* %TEMP0.z
%125 = bitcast i32 %123 to float
store float %125, float addrspace(5)* %TEMP0.w
%126 = ptrtoint [0 x float] addrspace(2)* %2 to i64
%127 = bitcast i64 %126 to <2 x i32>
%128 = extractelement <2 x i32> %127, i32 0
%129 = extractelement <2 x i32> %127, i32 1
%130 = insertelement <4 x i32> undef, i32 %128, i32 0
%131 = insertelement <4 x i32> %130, i32 %129, i32 1
%132 = insertelement <4 x i32> %131, i32 96, i32 2
%133 = insertelement <4 x i32> %132, i32 163756, i32 3
%134 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %133, i32 16)
%135 = bitcast float %134 to i32
%136 = ptrtoint [0 x float] addrspace(2)* %2 to i64
%137 = bitcast i64 %136 to <2 x i32>
%138 = extractelement <2 x i32> %137, i32 0
%139 = extractelement <2 x i32> %137, i32 1
%140 = insertelement <4 x i32> undef, i32 %138, i32 0
%141 = insertelement <4 x i32> %140, i32 %139, i32 1
%142 = insertelement <4 x i32> %141, i32 96, i32 2
%143 = insertelement <4 x i32> %142, i32 163756, i32 3
%144 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %143, i32 20)
%145 = bitcast float %144 to i32
%146 = insertelement <2 x i32> undef, i32 %135, i32 0
%147 = insertelement <2 x i32> %146, i32 %145, i32 1
%148 = bitcast <2 x i32> %147 to i64
%149 = ptrtoint [0 x float] addrspace(2)* %2 to i64
%150 = bitcast i64 %149 to <2 x i32>
%151 = extractelement <2 x i32> %150, i32 0
%152 = extractelement <2 x i32> %150, i32 1
%153 = insertelement <4 x i32> undef, i32 %151, i32 0
%154 = insertelement <4 x i32> %153, i32 %152, i32 1
%155 = insertelement <4 x i32> %154, i32 96, i32 2
%156 = insertelement <4 x i32> %155, i32 163756, i32 3
%157 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %156, i32 48)
%158 = bitcast float %157 to i32
%159 = ptrtoint [0 x float] addrspace(2)* %2 to i64
%160 = bitcast i64 %159 to <2 x i32>
%161 = extractelement <2 x i32> %160, i32 0
%162 = extractelement <2 x i32> %160, i32 1
%163 = insertelement <4 x i32> undef, i32 %161, i32 0
%164 = insertelement <4 x i32> %163, i32 %162, i32 1
%165 = insertelement <4 x i32> %164, i32 96, i32 2
%166 = insertelement <4 x i32> %165, i32 163756, i32 3
%167 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %166, i32 52)
%168 = bitcast float %167 to i32
%169 = insertelement <2 x i32> undef, i32 %158, i32 0
%170 = insertelement <2 x i32> %169, i32 %168, i32 1
%171 = bitcast <2 x i32> %170 to i64
%172 = srem i64 %148, %171
%173 = bitcast i64 %172 to <2 x i32>
%174 = extractelement <2 x i32> %173, i32 0
%175 = extractelement <2 x i32> %173, i32 1
%176 = bitcast i32 %174 to float
store float %176, float addrspace(5)* %TEMP1.x
%177 = bitcast i32 %175 to float
store float %177, float addrspace(5)* %TEMP1.y
%178 = load float, float addrspace(5)* %TEMP0.y
%179 = load float, float addrspace(5)* %TEMP0.x
%180 = bitcast float %179 to i32
%181 = insertelement <2 x i32> undef, i32 %180, i32 0
%182 = bitcast float %178 to i32
%183 = insertelement <2 x i32> %181, i32 %182, i32 1
%184 = bitcast <2 x i32> %183 to i64
%185 = ptrtoint [0 x float] addrspace(2)* %2 to i64
%186 = bitcast i64 %185 to <2 x i32>
%187 = extractelement <2 x i32> %186, i32 0
%188 = extractelement <2 x i32> %186, i32 1
%189 = insertelement <4 x i32> undef, i32 %187, i32 0
%190 = insertelement <4 x i32> %189, i32 %188, i32 1
%191 = insertelement <4 x i32> %190, i32 96, i32 2
%192 = insertelement <4 x i32> %191, i32 163756, i32 3
%193 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %192, i32 64)
%194 = bitcast float %193 to i32
%195 = ptrtoint [0 x float] addrspace(2)* %2 to i64
%196 = bitcast i64 %195 to <2 x i32>
%197 = extractelement <2 x i32> %196, i32 0
%198 = extractelement <2 x i32> %196, i32 1
%199 = insertelement <4 x i32> undef, i32 %197, i32 0
%200 = insertelement <4 x i32> %199, i32 %198, i32 1
%201 = insertelement <4 x i32> %200, i32 96, i32 2
%202 = insertelement <4 x i32> %201, i32 163756, i32 3
%203 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %202, i32 68)
%204 = bitcast float %203 to i32
%205 = insertelement <2 x i32> undef, i32 %194, i32 0
%206 = insertelement <2 x i32> %205, i32 %204, i32 1
%207 = bitcast <2 x i32> %206 to i64
%208 = icmp eq i64 %184, %207
%209 = sext i1 %208 to i32
%210 = bitcast i32 %209 to float
store float %210, float addrspace(5)* %TEMP2.x
%211 = load float, float addrspace(5)* %TEMP0.w
%212 = load float, float addrspace(5)* %TEMP0.z
%213 = bitcast float %212 to i32
%214 = insertelement <2 x i32> undef, i32 %213, i32 0
%215 = bitcast float %211 to i32
%216 = insertelement <2 x i32> %214, i32 %215, i32 1
%217 = bitcast <2 x i32> %216 to i64
%218 = ptrtoint [0 x float] addrspace(2)* %2 to i64
%219 = bitcast i64 %218 to <2 x i32>
%220 = extractelement <2 x i32> %219, i32 0
%221 = extractelement <2 x i32> %219, i32 1
%222 = insertelement <4 x i32> undef, i32 %220, i32 0
%223 = insertelement <4 x i32> %222, i32 %221, i32 1
%224 = insertelement <4 x i32> %223, i32 96, i32 2
%225 = insertelement <4 x i32> %224, i32 163756, i32 3
%226 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %225, i32 72)
%227 = bitcast float %226 to i32
%228 = ptrtoint [0 x float] addrspace(2)* %2 to i64
%229 = bitcast i64 %228 to <2 x i32>
%230 = extractelement <2 x i32> %229, i32 0
%231 = extractelement <2 x i32> %229, i32 1
%232 = insertelement <4 x i32> undef, i32 %230, i32 0
%233 = insertelement <4 x i32> %232, i32 %231, i32 1
%234 = insertelement <4 x i32> %233, i32 96, i32 2
%235 = insertelement <4 x i32> %234, i32 163756, i32 3
%236 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %235, i32 76)
%237 = bitcast float %236 to i32
%238 = insertelement <2 x i32> undef, i32 %227, i32 0
%239 = insertelement <2 x i32> %238, i32 %237, i32 1
%240 = bitcast <2 x i32> %239 to i64
%241 = icmp eq i64 %217, %240
%242 = sext i1 %241 to i32
%243 = bitcast i32 %242 to float
store float %243, float addrspace(5)* %TEMP2.y
%244 = load float, float addrspace(5)* %TEMP1.y
%245 = load float, float addrspace(5)* %TEMP1.x
%246 = bitcast float %245 to i32
%247 = insertelement <2 x i32> undef, i32 %246, i32 0
%248 = bitcast float %244 to i32
%249 = insertelement <2 x i32> %247, i32 %248, i32 1
%250 = bitcast <2 x i32> %249 to i64
%251 = ptrtoint [0 x float] addrspace(2)* %2 to i64
%252 = bitcast i64 %251 to <2 x i32>
%253 = extractelement <2 x i32> %252, i32 0
%254 = extractelement <2 x i32> %252, i32 1
%255 = insertelement <4 x i32> undef, i32 %253, i32 0
%256 = insertelement <4 x i32> %255, i32 %254, i32 1
%257 = insertelement <4 x i32> %256, i32 96, i32 2
%258 = insertelement <4 x i32> %257, i32 163756, i32 3
%259 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %258, i32 80)
%260 = bitcast float %259 to i32
%261 = ptrtoint [0 x float] addrspace(2)* %2 to i64
%262 = bitcast i64 %261 to <2 x i32>
%263 = extractelement <2 x i32> %262, i32 0
%264 = extractelement <2 x i32> %262, i32 1
%265 = insertelement <4 x i32> undef, i32 %263, i32 0
%266 = insertelement <4 x i32> %265, i32 %264, i32 1
%267 = insertelement <4 x i32> %266, i32 96, i32 2
%268 = insertelement <4 x i32> %267, i32 163756, i32 3
%269 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %268, i32 84)
%270 = bitcast float %269 to i32
%271 = insertelement <2 x i32> undef, i32 %260, i32 0
%272 = insertelement <2 x i32> %271, i32 %270, i32 1
%273 = bitcast <2 x i32> %272 to i64
%274 = icmp eq i64 %250, %273
%275 = sext i1 %274 to i32
%276 = bitcast i32 %275 to float
store float %276, float addrspace(5)* %TEMP2.z
%277 = load float, float addrspace(5)* %TEMP2.y
%278 = bitcast float %277 to i32
%279 = load float, float addrspace(5)* %TEMP2.z
%280 = bitcast float %279 to i32
%281 = and i32 %278, %280
%282 = bitcast i32 %281 to float
store float %282, float addrspace(5)* %TEMP2.y
%283 = load float, float addrspace(5)* %TEMP2.x
%284 = bitcast float %283 to i32
%285 = load float, float addrspace(5)* %TEMP2.y
%286 = bitcast float %285 to i32
%287 = and i32 %284, %286
%288 = bitcast i32 %287 to float
store float %288, float addrspace(5)* %TEMP3.x
%289 = load float, float addrspace(5)* %TEMP3.x
%290 = load float, float addrspace(5)* %TEMP4.x
%291 = bitcast float %289 to i32
%292 = icmp ne i32 %291, 0
%293 = select i1 %292, float 0.000000e+00, float %290
%294 = load float, float addrspace(5)* %TEMP3.x
%295 = load float, float addrspace(5)* %TEMP4.y
%296 = bitcast float %294 to i32
%297 = icmp ne i32 %296, 0
%298 = select i1 %297, float 1.000000e+00, float %295
%299 = load float, float addrspace(5)* %TEMP3.x
%300 = load float, float addrspace(5)* %TEMP4.z
%301 = bitcast float %299 to i32
%302 = icmp ne i32 %301, 0
%303 = select i1 %302, float 0.000000e+00, float %300
%304 = load float, float addrspace(5)* %TEMP3.x
%305 = load float, float addrspace(5)* %TEMP4.w
%306 = bitcast float %304 to i32
%307 = icmp ne i32 %306, 0
%308 = select i1 %307, float 1.000000e+00, float %305
store float %293, float addrspace(5)* %TEMP4.x
store float %298, float addrspace(5)* %TEMP4.y
store float %303, float addrspace(5)* %TEMP4.z
store float %308, float addrspace(5)* %TEMP4.w
%309 = load float, float addrspace(5)* %TEMP3.x
%310 = bitcast float %309 to i32
%311 = xor i32 %310, -1
%312 = bitcast i32 %311 to float
store float %312, float addrspace(5)* %TEMP5.x
%313 = load float, float addrspace(5)* %TEMP5.x
%314 = load float, float addrspace(5)* %TEMP4.x
%315 = bitcast float %313 to i32
%316 = icmp ne i32 %315, 0
%317 = select i1 %316, float 1.000000e+00, float %314
%318 = load float, float addrspace(5)* %TEMP5.x
%319 = load float, float addrspace(5)* %TEMP4.y
%320 = bitcast float %318 to i32
%321 = icmp ne i32 %320, 0
%322 = select i1 %321, float 0.000000e+00, float %319
%323 = load float, float addrspace(5)* %TEMP5.x
%324 = load float, float addrspace(5)* %TEMP4.z
%325 = bitcast float %323 to i32
%326 = icmp ne i32 %325, 0
%327 = select i1 %326, float 0.000000e+00, float %324
%328 = load float, float addrspace(5)* %TEMP5.x
%329 = load float, float addrspace(5)* %TEMP4.w
%330 = bitcast float %328 to i32
%331 = icmp ne i32 %330, 0
%332 = select i1 %331, float 1.000000e+00, float %329
store float %317, float addrspace(5)* %TEMP4.x
store float %322, float addrspace(5)* %TEMP4.y
store float %327, float addrspace(5)* %TEMP4.z
store float %332, float addrspace(5)* %TEMP4.w
%333 = load float, float addrspace(5)* %TEMP4.x
%334 = load float, float addrspace(5)* %TEMP4.y
%335 = load float, float addrspace(5)* %TEMP4.z
%336 = load float, float addrspace(5)* %TEMP4.w
store float %333, float addrspace(5)* %OUT0.x
store float %334, float addrspace(5)* %OUT0.y
store float %335, float addrspace(5)* %OUT0.z
store float %336, float addrspace(5)* %OUT0.w
%337 = load float, float addrspace(5)* %OUT0.x
%338 = load float, float addrspace(5)* %OUT0.y
%339 = load float, float addrspace(5)* %OUT0.z
%340 = load float, float addrspace(5)* %OUT0.w
%341 = bitcast float %4 to i32
%342 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %341, 8
%343 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %342, float %337, 9
%344 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %343, float %338, 10
%345 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %344, float %339, 11
%346 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %345, float %340, 12
%347 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %346, float %20, 23
ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %347
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1
; Function Attrs: alwaysinline
define private amdgpu_ps void @ps_epilog(i64 inreg, i64 inreg, i64 inreg, i64 inreg, float inreg, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float) #2 {
main_body:
%20 = call nsz <2 x half> @llvm.amdgcn.cvt.pkrtz(float %5, float %6) #1
%21 = bitcast <2 x half> %20 to i32
%22 = bitcast i32 %21 to float
%23 = call nsz <2 x half> @llvm.amdgcn.cvt.pkrtz(float %7, float %8) #1
%24 = bitcast <2 x half> %23 to i32
%25 = bitcast i32 %24 to float
%26 = bitcast float %22 to <2 x i16>
%27 = bitcast float %25 to <2 x i16>
call void @llvm.amdgcn.exp.compr.v2i16(i32 0, i32 15, <2 x i16> %26, <2 x i16> %27, i1 true, i1 true) #4
ret void
}
; Function Attrs: nounwind readnone speculatable
declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #3
; Function Attrs: nounwind
declare void @llvm.amdgcn.exp.compr.v2i16(i32, i32, <2 x i16>, <2 x i16>, i1, i1) #4
define amdgpu_ps void @wrapper([0 x <4 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), [0 x float] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #5 {
main_body:
%22 = ptrtoint [0 x <4 x i32>] addrspace(2)* %0 to i64
%23 = bitcast i64 %22 to <2 x i32>
%24 = extractelement <2 x i32> %23, i32 0
%25 = extractelement <2 x i32> %23, i32 1
%26 = ptrtoint [0 x <8 x i32>] addrspace(2)* %1 to i64
%27 = bitcast i64 %26 to <2 x i32>
%28 = extractelement <2 x i32> %27, i32 0
%29 = extractelement <2 x i32> %27, i32 1
%30 = ptrtoint [0 x float] addrspace(2)* %2 to i64
%31 = bitcast i64 %30 to <2 x i32>
%32 = extractelement <2 x i32> %31, i32 0
%33 = extractelement <2 x i32> %31, i32 1
%34 = ptrtoint [0 x <8 x i32>] addrspace(2)* %3 to i64
%35 = bitcast i64 %34 to <2 x i32>
%36 = extractelement <2 x i32> %35, i32 0
%37 = extractelement <2 x i32> %35, i32 1
%38 = bitcast float %4 to i32
%39 = bitcast <2 x i32> %6 to <2 x float>
%40 = extractelement <2 x float> %39, i32 0
%41 = extractelement <2 x float> %39, i32 1
%42 = bitcast <2 x i32> %7 to <2 x float>
%43 = extractelement <2 x float> %42, i32 0
%44 = extractelement <2 x float> %42, i32 1
%45 = bitcast <2 x i32> %8 to <2 x float>
%46 = extractelement <2 x float> %45, i32 0
%47 = extractelement <2 x float> %45, i32 1
%48 = bitcast <3 x i32> %9 to <3 x float>
%49 = extractelement <3 x float> %48, i32 0
%50 = extractelement <3 x float> %48, i32 1
%51 = extractelement <3 x float> %48, i32 2
%52 = bitcast <2 x i32> %10 to <2 x float>
%53 = extractelement <2 x float> %52, i32 0
%54 = extractelement <2 x float> %52, i32 1
%55 = bitcast <2 x i32> %11 to <2 x float>
%56 = extractelement <2 x float> %55, i32 0
%57 = extractelement <2 x float> %55, i32 1
%58 = bitcast <2 x i32> %12 to <2 x float>
%59 = extractelement <2 x float> %58, i32 0
%60 = extractelement <2 x float> %58, i32 1
%61 = bitcast i32 %18 to float
%62 = bitcast i32 %19 to float
%63 = bitcast i32 %21 to float
%64 = insertelement <2 x i32> undef, i32 %24, i32 0
%65 = insertelement <2 x i32> %64, i32 %25, i32 1
%66 = bitcast <2 x i32> %65 to i64
%67 = inttoptr i64 %66 to [0 x <4 x i32>] addrspace(2)*
%68 = insertelement <2 x i32> undef, i32 %28, i32 0
%69 = insertelement <2 x i32> %68, i32 %29, i32 1
%70 = bitcast <2 x i32> %69 to i64
%71 = inttoptr i64 %70 to [0 x <8 x i32>] addrspace(2)*
%72 = insertelement <2 x i32> undef, i32 %32, i32 0
%73 = insertelement <2 x i32> %72, i32 %33, i32 1
%74 = bitcast <2 x i32> %73 to i64
%75 = inttoptr i64 %74 to [0 x float] addrspace(2)*
%76 = insertelement <2 x i32> undef, i32 %36, i32 0
%77 = insertelement <2 x i32> %76, i32 %37, i32 1
%78 = bitcast <2 x i32> %77 to i64
%79 = inttoptr i64 %78 to [0 x <8 x i32>] addrspace(2)*
%80 = bitcast i32 %38 to float
%81 = insertelement <2 x float> undef, float %40, i32 0
%82 = insertelement <2 x float> %81, float %41, i32 1
%83 = bitcast <2 x float> %82 to <2 x i32>
%84 = insertelement <2 x float> undef, float %43, i32 0
%85 = insertelement <2 x float> %84, float %44, i32 1
%86 = bitcast <2 x float> %85 to <2 x i32>
%87 = insertelement <2 x float> undef, float %46, i32 0
%88 = insertelement <2 x float> %87, float %47, i32 1
%89 = bitcast <2 x float> %88 to <2 x i32>
%90 = insertelement <3 x float> undef, float %49, i32 0
%91 = insertelement <3 x float> %90, float %50, i32 1
%92 = insertelement <3 x float> %91, float %51, i32 2
%93 = bitcast <3 x float> %92 to <3 x i32>
%94 = insertelement <2 x float> undef, float %53, i32 0
%95 = insertelement <2 x float> %94, float %54, i32 1
%96 = bitcast <2 x float> %95 to <2 x i32>
%97 = insertelement <2 x float> undef, float %56, i32 0
%98 = insertelement <2 x float> %97, float %57, i32 1
%99 = bitcast <2 x float> %98 to <2 x i32>
%100 = insertelement <2 x float> undef, float %59, i32 0
%101 = insertelement <2 x float> %100, float %60, i32 1
%102 = bitcast <2 x float> %101 to <2 x i32>
%103 = bitcast float %61 to i32
%104 = bitcast float %62 to i32
%105 = bitcast float %63 to i32
%106 = call <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([0 x <4 x i32>] addrspace(2)* %67, [0 x <8 x i32>] addrspace(2)* %71, [0 x float] addrspace(2)* %75, [0 x <8 x i32>] addrspace(2)* %79, float %80, i32 %5, <2 x i32> %83, <2 x i32> %86, <2 x i32> %89, <3 x i32> %93, <2 x i32> %96, <2 x i32> %99, <2 x i32> %102, float %13, float %14, float %15, float %16, float %17, i32 %103, i32 %104, float %20, i32 %105)
%107 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 0
%108 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 1
%109 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 2
%110 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 3
%111 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 4
%112 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 5
%113 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 6
%114 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 7
%115 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 8
%116 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 9
%117 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 10
%118 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 11
%119 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 12
%120 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 13
%121 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 14
%122 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 15
%123 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 16
%124 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 17
%125 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 18
%126 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 19
%127 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 20
%128 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 21
%129 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 22
%130 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 23
%131 = insertelement <2 x i32> undef, i32 %107, i32 0
%132 = insertelement <2 x i32> %131, i32 %108, i32 1
%133 = bitcast <2 x i32> %132 to i64
%134 = insertelement <2 x i32> undef, i32 %109, i32 0
%135 = insertelement <2 x i32> %134, i32 %110, i32 1
%136 = bitcast <2 x i32> %135 to i64
%137 = insertelement <2 x i32> undef, i32 %111, i32 0
%138 = insertelement <2 x i32> %137, i32 %112, i32 1
%139 = bitcast <2 x i32> %138 to i64
%140 = insertelement <2 x i32> undef, i32 %113, i32 0
%141 = insertelement <2 x i32> %140, i32 %114, i32 1
%142 = bitcast <2 x i32> %141 to i64
%143 = bitcast i32 %115 to float
call void @ps_epilog(i64 %133, i64 %136, i64 %139, i64 %142, float %143, float %116, float %117, float %118, float %119, float %120, float %121, float %122, float %123, float %124, float %125, float %126, float %127, float %128, float %129, float %130)
ret void
}
attributes #0 = { alwaysinline "no-signed-zeros-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { alwaysinline "InitialPSInputAddr"="16777215" "no-signed-zeros-fp-math"="true" }
attributes #3 = { nounwind readnone speculatable }
attributes #4 = { nounwind }
attributes #5 = { "no-signed-zeros-fp-math"="true" }
radeonsi: Compiling shader 4
SHADER KEY
part.ps.prolog.color_two_side = 0
part.ps.prolog.flatshade_colors = 0
part.ps.prolog.poly_stipple = 0
part.ps.prolog.force_persp_sample_interp = 0
part.ps.prolog.force_linear_sample_interp = 0
part.ps.prolog.force_persp_center_interp = 0
part.ps.prolog.force_linear_center_interp = 0
part.ps.prolog.bc_optimize_for_persp = 0
part.ps.prolog.bc_optimize_for_linear = 0
part.ps.epilog.spi_shader_col_format = 0x4
part.ps.epilog.color_is_int8 = 0x0
part.ps.epilog.color_is_int10 = 0x0
part.ps.epilog.last_cbuf = 0
part.ps.epilog.alpha_func = 7
part.ps.epilog.alpha_to_one = 0
part.ps.epilog.poly_line_smoothing = 0
part.ps.epilog.clamp_color = 0
Pixel Shader:
Shader main disassembly:
wrapper:
BB0_0:
s_mov_b32 s7, 0x27fac ; BE8703FF 00027FAC
s_movk_i32 s6, 0x60 ; B0060060
v_mov_b32_e32 v2, 0x4f800000 ; 7E0402FF 4F800000
v_mov_b32_e32 v3, 0x5f7ffffc ; 7E0602FF 5F7FFFFC
v_mov_b32_e32 v4, 0x2f800000 ; 7E0802FF 2F800000
v_mov_b32_e32 v5, 0xcf800000 ; 7E0A02FF CF800000
s_buffer_load_dwordx4 s[8:11], s[4:7], 0x0 ; C2840500
s_buffer_load_dwordx2 s[12:13], s[4:7], 0x4 ; C2460504
s_buffer_load_dwordx4 s[16:19], s[4:7], 0x8 ; C2880508
v_mov_b32_e32 v10, 0 ; 7E140280
s_waitcnt lgkmcnt(0) ; BF8C007F
s_ashr_i32 s14, s9, 31 ; 910E9F09
s_add_u32 s2, s8, s14 ; 80020E08
s_ashr_i32 s0, s17, 31 ; 91009F11
s_add_u32 s16, s16, s0 ; 80100010
s_mov_b32 s1, s0 ; BE810300
s_addc_u32 s17, s17, s0 ; 82110011
s_xor_b64 s[16:17], s[16:17], s[0:1] ; 89900010
v_cvt_f32_u32_e32 v0, s16 ; 7E000C10
v_cvt_f32_u32_e32 v1, s17 ; 7E020C11
s_sub_u32 s3, 0, s16 ; 80831080
s_subb_u32 s8, 0, s17 ; 82881180
s_mov_b32 s15, s14 ; BE8F030E
v_mac_f32_e32 v0, v1, v2 ; 3E000501
v_rcp_f32_e32 v0, v0 ; 7E005500
v_mul_f32_e32 v0, v0, v3 ; 10000700
v_mul_f32_e32 v1, v0, v4 ; 10020900
v_trunc_f32_e32 v1, v1 ; 7E024301
v_mac_f32_e32 v0, v1, v5 ; 3E000B01
v_cvt_u32_f32_e32 v0, v0 ; 7E000F00
v_cvt_u32_f32_e32 v1, v1 ; 7E020F01
v_mul_hi_u32 v6, s3, v0 ; D2D40006 00020003
v_mul_lo_i32 v7, s3, v1 ; D2D60007 00020203
v_mul_lo_i32 v8, s8, v0 ; D2D60008 00020008
v_add_i32_e32 v6, vcc, v6, v7 ; 4A0C0F06
v_mul_lo_i32 v7, s3, v0 ; D2D60007 00020003
v_add_i32_e32 v6, vcc, v6, v8 ; 4A0C1106
v_mul_lo_i32 v8, v0, v6 ; D2D60008 00020D00
v_mul_hi_u32 v11, v0, v6 ; D2D4000B 00020D00
v_mul_hi_u32 v9, v0, v7 ; D2D40009 00020F00
v_mul_hi_u32 v12, v1, v6 ; D2D4000C 00020D01
v_mul_lo_i32 v6, v1, v6 ; D2D60006 00020D01
v_add_i32_e32 v8, vcc, v9, v8 ; 4A101109
v_addc_u32_e32 v9, vcc, 0, v11, vcc ; 50121680
v_mul_lo_i32 v11, v1, v7 ; D2D6000B 00020F01
v_mul_hi_u32 v7, v1, v7 ; D2D40007 00020F01
v_add_i32_e32 v8, vcc, v11, v8 ; 4A10110B
v_addc_u32_e32 v7, vcc, v9, v7, vcc ; 500E0F09
v_addc_u32_e32 v8, vcc, v12, v10, vcc ; 5010150C
v_add_i32_e32 v6, vcc, v7, v6 ; 4A0C0D07
v_add_i32_e64 v0, s[0:1], v0, v6 ; D24A0000 00020D00
v_addc_u32_e32 v7, vcc, 0, v8, vcc ; 500E1080
v_addc_u32_e64 v6, vcc, v1, v7, s[0:1] ; D2506A06 00020F01
v_mul_lo_i32 v8, s3, v6 ; D2D60008 00020C03
v_mul_hi_u32 v9, s3, v0 ; D2D40009 00020003
v_mul_lo_i32 v11, s8, v0 ; D2D6000B 00020008
v_add_i32_e32 v8, vcc, v9, v8 ; 4A101109
v_mul_lo_i32 v9, s3, v0 ; D2D60009 00020003
v_add_i32_e32 v8, vcc, v11, v8 ; 4A10110B
v_mul_lo_i32 v13, v0, v8 ; D2D6000D 00021100
v_mul_hi_u32 v15, v0, v8 ; D2D4000F 00021100
v_mul_hi_u32 v14, v0, v9 ; D2D4000E 00021300
v_mul_hi_u32 v12, v6, v9 ; D2D4000C 00021306
v_mul_lo_i32 v9, v6, v9 ; D2D60009 00021306
v_mul_hi_u32 v11, v6, v8 ; D2D4000B 00021106
v_add_i32_e32 v13, vcc, v14, v13 ; 4A1A1B0E
v_addc_u32_e32 v14, vcc, 0, v15, vcc ; 501C1E80
v_mul_lo_i32 v6, v6, v8 ; D2D60006 00021106
v_add_i32_e32 v9, vcc, v9, v13 ; 4A121B09
v_addc_u32_e32 v9, vcc, v14, v12, vcc ; 5012190E
v_addc_u32_e32 v8, vcc, v11, v10, vcc ; 5010150B
v_add_i32_e32 v6, vcc, v9, v6 ; 4A0C0D09
v_addc_u32_e32 v8, vcc, 0, v8, vcc ; 50101080
v_add_i32_e32 v1, vcc, v1, v7 ; 4A020F01
v_addc_u32_e64 v1, vcc, v1, v8, s[0:1] ; D2506A01 00021101
s_addc_u32 s3, s9, s14 ; 82030E09
v_add_i32_e32 v0, vcc, v0, v6 ; 4A000D00
s_xor_b64 s[8:9], s[2:3], s[14:15] ; 89880E02
v_addc_u32_e32 v1, vcc, 0, v1, vcc ; 50020280
v_mul_lo_i32 v6, s8, v1 ; D2D60006 00020208
v_mul_hi_u32 v7, s8, v0 ; D2D40007 00020008
v_mul_hi_u32 v9, s8, v1 ; D2D40009 00020208
v_mul_hi_u32 v11, s9, v1 ; D2D4000B 00020209
v_mul_lo_i32 v1, s9, v1 ; D2D60001 00020209
v_add_i32_e32 v6, vcc, v7, v6 ; 4A0C0D07
v_addc_u32_e32 v7, vcc, 0, v9, vcc ; 500E1280
v_mul_lo_i32 v9, s9, v0 ; D2D60009 00020009
v_mul_hi_u32 v0, s9, v0 ; D2D40000 00020009
v_mov_b32_e32 v8, s14 ; 7E10020E
v_add_i32_e32 v6, vcc, v9, v6 ; 4A0C0D09
v_addc_u32_e32 v0, vcc, v7, v0, vcc ; 50000107
v_addc_u32_e32 v6, vcc, v11, v10, vcc ; 500C150B
v_add_i32_e32 v0, vcc, v0, v1 ; 4A000300
v_addc_u32_e32 v1, vcc, 0, v6, vcc ; 50020C80
v_mul_lo_i32 v1, s16, v1 ; D2D60001 00020210
v_mul_hi_u32 v6, s16, v0 ; D2D40006 00020010
v_mul_lo_i32 v9, s17, v0 ; D2D60009 00020011
v_mul_lo_i32 v0, s16, v0 ; D2D60000 00020010
v_mov_b32_e32 v7, s17 ; 7E0E0211
v_add_i32_e32 v1, vcc, v6, v1 ; 4A020306
v_add_i32_e32 v1, vcc, v1, v9 ; 4A021301
v_sub_i32_e32 v6, vcc, s9, v1 ; 4C0C0209
v_sub_i32_e32 v0, vcc, s8, v0 ; 4C000008
v_subb_u32_e64 v6, s[0:1], v6, v7, vcc ; D2520006 01AA0F06
v_subrev_i32_e64 v9, s[0:1], s16, v0 ; D24E0009 00020010
v_subb_u32_e64 v7, s[2:3], v6, v7, s[0:1] ; D2520207 00020F06
v_subb_u32_e64 v6, s[0:1], v6, 0, s[0:1] ; D2520006 00010106
v_cmp_le_u32_e64 s[0:1], s17, v6 ; D1860000 00020C11
v_cndmask_b32_e64 v11, 0, -1, s[0:1] ; D200000B 00018280
v_cmp_le_u32_e64 s[0:1], s16, v9 ; D1860000 00021210
v_cndmask_b32_e64 v12, 0, -1, s[0:1] ; D200000C 00018280
v_cmp_eq_u32_e64 s[0:1], s17, v6 ; D1840000 00020C11
v_cndmask_b32_e64 v11, v11, v12, s[0:1] ; D200000B 0002190B
v_subrev_i32_e64 v12, s[0:1], s16, v9 ; D24E000C 00021210
s_ashr_i32 s2, s19, 31 ; 91029F13
v_subb_u32_e64 v7, s[0:1], v7, 0, s[0:1] ; D2520007 00010107
v_cmp_ne_u32_e64 s[0:1], 0, v11 ; D18A0000 00021680
s_add_u32 s8, s18, s2 ; 80080212
v_cndmask_b32_e64 v6, v6, v7, s[0:1] ; D2000006 00020F06
v_mov_b32_e32 v7, s9 ; 7E0E0209
s_mov_b32 s3, s2 ; BE830302
s_addc_u32 s9, s19, s2 ; 82090213
s_xor_b64 s[20:21], s[8:9], s[2:3] ; 89940208
v_subb_u32_e32 v1, vcc, v7, v1, vcc ; 52020307
v_cvt_f32_u32_e32 v7, s20 ; 7E0E0C14
v_cvt_f32_u32_e32 v11, s21 ; 7E160C15
v_cmp_le_u32_e32 vcc, s17, v1 ; 7D860211
v_cndmask_b32_e64 v13, 0, -1, vcc ; D200000D 01A98280
v_cmp_le_u32_e32 vcc, s16, v0 ; 7D860010
v_mac_f32_e32 v7, v11, v2 ; 3E0E050B
v_rcp_f32_e32 v7, v7 ; 7E0E5507
v_cndmask_b32_e64 v14, 0, -1, vcc ; D200000E 01A98280
v_cmp_eq_u32_e32 vcc, s17, v1 ; 7D840211
v_cndmask_b32_e32 v11, v13, v14, vcc ; 00161D0D
v_cmp_ne_u32_e64 s[2:3], 0, v11 ; D18A0002 00021680
v_cndmask_b32_e64 v1, v1, v6, s[2:3] ; D2000001 000A0D01
v_mul_f32_e32 v6, v7, v3 ; 100C0707
v_mul_f32_e32 v7, v6, v4 ; 100E0906
v_trunc_f32_e32 v7, v7 ; 7E0E4307
v_mac_f32_e32 v6, v7, v5 ; 3E0C0B07
v_cvt_u32_f32_e32 v6, v6 ; 7E0C0F06
v_cvt_u32_f32_e32 v7, v7 ; 7E0E0F07
s_sub_u32 s8, 0, s20 ; 80881480
v_cndmask_b32_e64 v9, v9, v12, s[0:1] ; D2000009 00021909
v_mul_hi_u32 v11, s8, v6 ; D2D4000B 00020C08
v_mul_lo_i32 v12, s8, v7 ; D2D6000C 00020E08
s_subb_u32 s9, 0, s21 ; 82891580
v_mul_lo_i32 v13, s9, v6 ; D2D6000D 00020C09
v_cndmask_b32_e64 v0, v0, v9, s[2:3] ; D2000000 000A1300
v_add_i32_e32 v11, vcc, v11, v12 ; 4A16190B
v_mul_lo_i32 v12, s8, v6 ; D2D6000C 00020C08
v_add_i32_e32 v11, vcc, v11, v13 ; 4A161B0B
v_mul_lo_i32 v13, v6, v11 ; D2D6000D 00021706
v_mul_hi_u32 v9, v6, v11 ; D2D40009 00021706
v_mul_hi_u32 v14, v6, v12 ; D2D4000E 00021906
v_mul_hi_u32 v15, v7, v11 ; D2D4000F 00021707
v_mul_lo_i32 v11, v7, v11 ; D2D6000B 00021707
v_xor_b32_e32 v0, s14, v0 ; 3A00000E
v_add_i32_e32 v13, vcc, v14, v13 ; 4A1A1B0E
v_mul_lo_i32 v14, v7, v12 ; D2D6000E 00021907
v_mul_hi_u32 v12, v7, v12 ; D2D4000C 00021907
v_addc_u32_e32 v9, vcc, 0, v9, vcc ; 50121280
v_xor_b32_e32 v1, s14, v1 ; 3A02020E
v_add_i32_e32 v13, vcc, v14, v13 ; 4A1A1B0E
v_addc_u32_e32 v9, vcc, v9, v12, vcc ; 50121909
v_addc_u32_e32 v12, vcc, v15, v10, vcc ; 5018150F
v_add_i32_e32 v9, vcc, v9, v11 ; 4A121709
v_add_i32_e64 v6, s[0:1], v6, v9 ; D24A0006 00021306
v_addc_u32_e32 v11, vcc, 0, v12, vcc ; 50161880
v_addc_u32_e64 v9, vcc, v7, v11, s[0:1] ; D2506A09 00021707
v_mul_lo_i32 v12, s8, v9 ; D2D6000C 00021208
v_mul_hi_u32 v13, s8, v6 ; D2D4000D 00020C08
v_mul_lo_i32 v14, s9, v6 ; D2D6000E 00020C09
v_add_i32_e32 v12, vcc, v13, v12 ; 4A18190D
v_mul_lo_i32 v13, s8, v6 ; D2D6000D 00020C08
v_add_i32_e32 v12, vcc, v14, v12 ; 4A18190E
v_mul_lo_i32 v16, v6, v12 ; D2D60010 00021906
v_mul_hi_u32 v18, v6, v12 ; D2D40012 00021906
v_mul_hi_u32 v17, v6, v13 ; D2D40011 00021B06
v_mul_hi_u32 v15, v9, v13 ; D2D4000F 00021B09
v_mul_lo_i32 v13, v9, v13 ; D2D6000D 00021B09
v_mul_hi_u32 v14, v9, v12 ; D2D4000E 00021909
v_add_i32_e32 v16, vcc, v17, v16 ; 4A202111
v_addc_u32_e32 v17, vcc, 0, v18, vcc ; 50222480
v_mul_lo_i32 v9, v9, v12 ; D2D60009 00021909
v_add_i32_e32 v13, vcc, v13, v16 ; 4A1A210D
v_addc_u32_e32 v13, vcc, v17, v15, vcc ; 501A1F11
v_addc_u32_e32 v12, vcc, v14, v10, vcc ; 5018150E
v_add_i32_e32 v9, vcc, v13, v9 ; 4A12130D
v_addc_u32_e32 v12, vcc, 0, v12, vcc ; 50181880
v_add_i32_e32 v7, vcc, v7, v11 ; 4A0E1707
s_ashr_i32 s8, s11, 31 ; 91089F0B
v_addc_u32_e64 v7, vcc, v7, v12, s[0:1] ; D2506A07 00021907
s_add_u32 s0, s10, s8 ; 8000080A
v_add_i32_e32 v6, vcc, v6, v9 ; 4A0C1306
s_mov_b32 s9, s8 ; BE890308
s_addc_u32 s1, s11, s8 ; 8201080B
s_xor_b64 s[10:11], s[0:1], s[8:9] ; 898A0800
v_addc_u32_e32 v7, vcc, 0, v7, vcc ; 500E0E80
v_mul_lo_i32 v9, s10, v7 ; D2D60009 00020E0A
v_mul_hi_u32 v11, s10, v6 ; D2D4000B 00020C0A
v_mul_hi_u32 v12, s10, v7 ; D2D4000C 00020E0A
v_mul_hi_u32 v13, s11, v7 ; D2D4000D 00020E0B
v_mul_lo_i32 v7, s11, v7 ; D2D60007 00020E0B
v_add_i32_e32 v9, vcc, v11, v9 ; 4A12130B
v_addc_u32_e32 v11, vcc, 0, v12, vcc ; 50161880
v_mul_lo_i32 v12, s11, v6 ; D2D6000C 00020C0B
v_mul_hi_u32 v6, s11, v6 ; D2D40006 00020C0B
v_add_i32_e32 v9, vcc, v12, v9 ; 4A12130C
v_addc_u32_e32 v6, vcc, v11, v6, vcc ; 500C0D0B
v_addc_u32_e32 v9, vcc, v13, v10, vcc ; 5012150D
v_add_i32_e32 v6, vcc, v6, v7 ; 4A0C0F06
v_addc_u32_e32 v7, vcc, 0, v9, vcc ; 500E1280
v_subrev_i32_e32 v0, vcc, s14, v0 ; 4E00000E
v_mul_lo_i32 v7, s20, v7 ; D2D60007 00020E14
v_mul_hi_u32 v9, s20, v6 ; D2D40009 00020C14
v_subb_u32_e32 v1, vcc, v1, v8, vcc ; 52021101
v_mul_lo_i32 v8, s21, v6 ; D2D60008 00020C15
v_mul_lo_i32 v6, s20, v6 ; D2D60006 00020C14
v_add_i32_e32 v7, vcc, v9, v7 ; 4A0E0F09
v_mov_b32_e32 v9, s21 ; 7E120215
v_add_i32_e32 v7, vcc, v7, v8 ; 4A0E1107
v_sub_i32_e32 v8, vcc, s11, v7 ; 4C100E0B
v_sub_i32_e32 v6, vcc, s10, v6 ; 4C0C0C0A
v_subb_u32_e64 v8, s[0:1], v8, v9, vcc ; D2520008 01AA1308
v_subrev_i32_e64 v11, s[0:1], s20, v6 ; D24E000B 00020C14
v_subb_u32_e64 v9, s[2:3], v8, v9, s[0:1] ; D2520209 00021308
v_subb_u32_e64 v8, s[0:1], v8, 0, s[0:1] ; D2520008 00010108
s_buffer_load_dwordx2 s[2:3], s[4:7], 0xc ; C241050C
v_cmp_le_u32_e64 s[0:1], s21, v8 ; D1860000 00021015
v_cndmask_b32_e64 v12, 0, -1, s[0:1] ; D200000C 00018280
v_cmp_le_u32_e64 s[0:1], s20, v11 ; D1860000 00021614
v_cndmask_b32_e64 v13, 0, -1, s[0:1] ; D200000D 00018280
v_cmp_eq_u32_e64 s[0:1], s21, v8 ; D1840000 00021015
v_cndmask_b32_e64 v12, v12, v13, s[0:1] ; D200000C 00021B0C
v_subrev_i32_e64 v13, s[0:1], s20, v11 ; D24E000D 00021614
v_subb_u32_e64 v9, s[0:1], v9, 0, s[0:1] ; D2520009 00010109
s_buffer_load_dwordx4 s[16:19], s[4:7], 0x10 ; C2880510
s_waitcnt lgkmcnt(0) ; BF8C007F
s_ashr_i32 s10, s3, 31 ; 910A9F03
s_add_u32 s2, s2, s10 ; 80020A02
v_cmp_ne_u32_e64 s[0:1], 0, v12 ; D18A0000 00021880
v_cndmask_b32_e64 v8, v8, v9, s[0:1] ; D2000008 00021308
v_mov_b32_e32 v9, s11 ; 7E12020B
s_mov_b32 s11, s10 ; BE8B030A
s_addc_u32 s3, s3, s10 ; 82030A03
s_xor_b64 s[10:11], s[2:3], s[10:11] ; 898A0A02
v_cvt_f32_u32_e32 v12, s10 ; 7E180C0A
v_cvt_f32_u32_e32 v14, s11 ; 7E1C0C0B
v_subb_u32_e32 v7, vcc, v9, v7, vcc ; 520E0F09
v_cmp_le_u32_e32 vcc, s21, v7 ; 7D860E15
v_cndmask_b32_e64 v9, 0, -1, vcc ; D2000009 01A98280
v_mac_f32_e32 v12, v14, v2 ; 3E18050E
v_rcp_f32_e32 v2, v12 ; 7E04550C
v_cmp_le_u32_e32 vcc, s20, v6 ; 7D860C14
v_cndmask_b32_e64 v15, 0, -1, vcc ; D200000F 01A98280
v_cmp_eq_u32_e32 vcc, s21, v7 ; 7D840E15
v_mul_f32_e32 v2, v2, v3 ; 10040702
v_mul_f32_e32 v3, v2, v4 ; 10060902
v_trunc_f32_e32 v3, v3 ; 7E064303
v_mac_f32_e32 v2, v3, v5 ; 3E040B03
v_cvt_u32_f32_e32 v2, v2 ; 7E040F02
v_cvt_u32_f32_e32 v3, v3 ; 7E060F03
v_cndmask_b32_e32 v9, v9, v15, vcc ; 00121F09
v_cmp_ne_u32_e32 vcc, 0, v9 ; 7D8A1280
v_cndmask_b32_e32 v7, v7, v8, vcc ; 000E1107
s_sub_u32 s2, 0, s10 ; 80820A80
v_cndmask_b32_e64 v8, v11, v13, s[0:1] ; D2000008 00021B0B
v_cndmask_b32_e32 v4, v6, v8, vcc ; 00081106
v_mul_hi_u32 v5, s2, v2 ; D2D40005 00020402
v_mul_lo_i32 v6, s2, v3 ; D2D60006 00020602
s_subb_u32 s3, 0, s11 ; 82830B80
v_mul_lo_i32 v8, s3, v2 ; D2D60008 00020403
s_ashr_i32 s14, s13, 31 ; 910E9F0D
v_add_i32_e32 v5, vcc, v5, v6 ; 4A0A0D05
v_mul_lo_i32 v6, s2, v2 ; D2D60006 00020402
v_add_i32_e32 v5, vcc, v5, v8 ; 4A0A1105
v_mul_lo_i32 v8, v2, v5 ; D2D60008 00020B02
v_mul_hi_u32 v11, v2, v5 ; D2D4000B 00020B02
v_mul_hi_u32 v9, v2, v6 ; D2D40009 00020D02
v_mul_hi_u32 v12, v3, v5 ; D2D4000C 00020B03
v_mul_lo_i32 v5, v3, v5 ; D2D60005 00020B03
s_mov_b32 s15, s14 ; BE8F030E
v_add_i32_e32 v8, vcc, v9, v8 ; 4A101109
v_addc_u32_e32 v9, vcc, 0, v11, vcc ; 50121680
v_mul_lo_i32 v11, v3, v6 ; D2D6000B 00020D03
v_mul_hi_u32 v6, v3, v6 ; D2D40006 00020D03
v_xor_b32_e32 v4, s8, v4 ; 3A080808
v_xor_b32_e32 v7, s8, v7 ; 3A0E0E08
v_add_i32_e32 v8, vcc, v11, v8 ; 4A10110B
v_addc_u32_e32 v6, vcc, v9, v6, vcc ; 500C0D09
v_addc_u32_e32 v8, vcc, v12, v10, vcc ; 5010150C
v_add_i32_e32 v5, vcc, v6, v5 ; 4A0A0B06
v_add_i32_e64 v2, s[0:1], v2, v5 ; D24A0002 00020B02
v_addc_u32_e32 v6, vcc, 0, v8, vcc ; 500C1080
v_addc_u32_e64 v5, vcc, v3, v6, s[0:1] ; D2506A05 00020D03
v_mul_lo_i32 v8, s2, v5 ; D2D60008 00020A02
v_mul_hi_u32 v9, s2, v2 ; D2D40009 00020402
v_mul_lo_i32 v11, s3, v2 ; D2D6000B 00020403
v_add_i32_e32 v8, vcc, v9, v8 ; 4A101109
v_mul_lo_i32 v9, s2, v2 ; D2D60009 00020402
v_add_i32_e32 v8, vcc, v11, v8 ; 4A10110B
v_mul_lo_i32 v13, v2, v8 ; D2D6000D 00021102
v_mul_hi_u32 v15, v2, v8 ; D2D4000F 00021102
v_mul_hi_u32 v14, v2, v9 ; D2D4000E 00021302
v_mul_hi_u32 v12, v5, v9 ; D2D4000C 00021305
v_mul_lo_i32 v9, v5, v9 ; D2D60009 00021305
v_mul_hi_u32 v11, v5, v8 ; D2D4000B 00021105
v_add_i32_e32 v13, vcc, v14, v13 ; 4A1A1B0E
v_addc_u32_e32 v14, vcc, 0, v15, vcc ; 501C1E80
v_mul_lo_i32 v5, v5, v8 ; D2D60005 00021105
v_add_i32_e32 v9, vcc, v9, v13 ; 4A121B09
v_addc_u32_e32 v9, vcc, v14, v12, vcc ; 5012190E
v_addc_u32_e32 v8, vcc, v11, v10, vcc ; 5010150B
v_add_i32_e32 v5, vcc, v9, v5 ; 4A0A0B09
v_addc_u32_e32 v8, vcc, 0, v8, vcc ; 50101080
v_add_i32_e32 v3, vcc, v3, v6 ; 4A060D03
v_addc_u32_e64 v3, vcc, v3, v8, s[0:1] ; D2506A03 00021103
s_add_u32 s0, s12, s14 ; 80000E0C
s_addc_u32 s1, s13, s14 ; 82010E0D
v_add_i32_e32 v2, vcc, v2, v5 ; 4A040B02
s_xor_b64 s[12:13], s[0:1], s[14:15] ; 898C0E00
v_addc_u32_e32 v3, vcc, 0, v3, vcc ; 50060680
v_mul_lo_i32 v5, s12, v3 ; D2D60005 0002060C
v_mul_hi_u32 v6, s12, v2 ; D2D40006 0002040C
v_mul_hi_u32 v9, s12, v3 ; D2D40009 0002060C
v_mul_hi_u32 v11, s13, v3 ; D2D4000B 0002060D
v_mul_lo_i32 v3, s13, v3 ; D2D60003 0002060D
v_add_i32_e32 v5, vcc, v6, v5 ; 4A0A0B06
v_addc_u32_e32 v6, vcc, 0, v9, vcc ; 500C1280
v_mul_lo_i32 v9, s13, v2 ; D2D60009 0002040D
v_mul_hi_u32 v2, s13, v2 ; D2D40002 0002040D
v_mov_b32_e32 v8, s8 ; 7E100208
v_add_i32_e32 v5, vcc, v9, v5 ; 4A0A0B09
v_addc_u32_e32 v2, vcc, v6, v2, vcc ; 50040506
v_addc_u32_e32 v5, vcc, v11, v10, vcc ; 500A150B
v_add_i32_e32 v6, vcc, v2, v3 ; 4A0C0702
v_addc_u32_e32 v2, vcc, 0, v5, vcc ; 50040A80
v_mul_lo_i32 v5, s10, v2 ; D2D60005 0002040A
v_mul_hi_u32 v9, s10, v6 ; D2D40009 00020C0A
v_subrev_i32_e32 v2, vcc, s8, v4 ; 4E040808
v_mul_lo_i32 v4, s11, v6 ; D2D60004 00020C0B
v_subb_u32_e32 v3, vcc, v7, v8, vcc ; 52061107
v_add_i32_e32 v5, vcc, v9, v5 ; 4A0A0B09
v_mov_b32_e32 v7, s11 ; 7E0E020B
v_add_i32_e32 v4, vcc, v5, v4 ; 4A080905
v_mul_lo_i32 v5, s10, v6 ; D2D60005 00020C0A
v_sub_i32_e32 v6, vcc, s13, v4 ; 4C0C080D
v_sub_i32_e32 v5, vcc, s12, v5 ; 4C0A0A0C
v_subb_u32_e64 v6, s[0:1], v6, v7, vcc ; D2520006 01AA0F06
v_subrev_i32_e64 v8, s[0:1], s10, v5 ; D24E0008 00020A0A
v_subb_u32_e64 v7, s[2:3], v6, v7, s[0:1] ; D2520207 00020F06
v_subb_u32_e64 v6, s[0:1], v6, 0, s[0:1] ; D2520006 00010106
v_cmp_le_u32_e64 s[0:1], s11, v6 ; D1860000 00020C0B
v_cndmask_b32_e64 v9, 0, -1, s[0:1] ; D2000009 00018280
v_cmp_le_u32_e64 s[0:1], s10, v8 ; D1860000 0002100A
v_cndmask_b32_e64 v10, 0, -1, s[0:1] ; D200000A 00018280
v_cmp_eq_u32_e64 s[0:1], s11, v6 ; D1840000 00020C0B
v_cndmask_b32_e64 v9, v9, v10, s[0:1] ; D2000009 00021509
v_subrev_i32_e64 v10, s[0:1], s10, v8 ; D24E000A 0002100A
v_subb_u32_e64 v7, s[0:1], v7, 0, s[0:1] ; D2520007 00010107
v_cmp_ne_u32_e64 s[0:1], 0, v9 ; D18A0000 00021280
v_cndmask_b32_e64 v6, v6, v7, s[0:1] ; D2000006 00020F06
v_mov_b32_e32 v7, s13 ; 7E0E020D
v_subb_u32_e32 v4, vcc, v7, v4, vcc ; 52080907
v_cmp_le_u32_e32 vcc, s11, v4 ; 7D86080B
v_cndmask_b32_e64 v7, 0, -1, vcc ; D2000007 01A98280
v_cmp_le_u32_e32 vcc, s10, v5 ; 7D860A0A
v_cndmask_b32_e64 v9, 0, -1, vcc ; D2000009 01A98280
v_cmp_eq_u32_e32 vcc, s11, v4 ; 7D84080B
v_cndmask_b32_e32 v7, v7, v9, vcc ; 000E1307
v_cmp_ne_u32_e32 vcc, 0, v7 ; 7D8A0E80
v_cndmask_b32_e32 v4, v4, v6, vcc ; 00080D04
v_cndmask_b32_e64 v6, v8, v10, s[0:1] ; D2000006 00021508
s_buffer_load_dwordx2 s[0:1], s[4:7], 0x14 ; C2400514
v_cndmask_b32_e32 v5, v5, v6, vcc ; 000A0D05
v_xor_b32_e32 v5, s14, v5 ; 3A0A0A0E
v_xor_b32_e32 v6, s14, v4 ; 3A0C080E
v_mov_b32_e32 v7, s14 ; 7E0E020E
v_subrev_i32_e32 v4, vcc, s14, v5 ; 4E080A0E
v_subb_u32_e32 v5, vcc, v6, v7, vcc ; 520A0F06
v_cmp_eq_u64_e32 vcc, s[18:19], v[2:3] ; 7DC40412
s_waitcnt lgkmcnt(0) ; BF8C007F
v_cmp_eq_u64_e64 s[0:1], s[0:1], v[4:5] ; D1C40000 00020800
v_cmp_eq_u64_e64 s[2:3], s[16:17], v[0:1] ; D1C40002 00020010
s_and_b64 s[0:1], vcc, s[0:1] ; 8780006A
s_and_b64 s[0:1], s[2:3], s[0:1] ; 87800002
v_cndmask_b32_e64 v0, 1.0, 0, s[0:1] ; D2000000 000100F2
v_cndmask_b32_e64 v1, 0, 1.0, s[0:1] ; D2000001 0001E480
v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300
v_bfrev_b32_e32 v1, 60 ; 7E0270BC
exp mrt0 v0, v0, v1, v1 done compr vm ; F8001C0F 00000100
s_endpgm ; BF810000
*** SHADER CONFIG ***
SPI_PS_INPUT_ADDR = 0x0001
SPI_PS_INPUT_ENA = 0x0001
*** SHADER STATS ***
SGPRS: 24
VGPRS: 20
Spilled SGPRs: 0
Spilled VGPRs: 0
Private memory VGPRs: 0
Code Size: 2260 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
Max Waves: 10
********************
-------------- next part --------------
; ModuleID = 'tgsi'
source_filename = "tgsi"
target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"
target triple = "amdgcn--"
; Function Attrs: alwaysinline
define private amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([0 x <4 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), [0 x float] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 {
main_body:
%TEMP5.w = alloca float, addrspace(5)
%TEMP5.z = alloca float, addrspace(5)
%TEMP5.y = alloca float, addrspace(5)
%TEMP5.x = alloca float, addrspace(5)
%TEMP4.w = alloca float, addrspace(5)
%TEMP4.z = alloca float, addrspace(5)
%TEMP4.y = alloca float, addrspace(5)
%TEMP4.x = alloca float, addrspace(5)
%TEMP3.w = alloca float, addrspace(5)
%TEMP3.z = alloca float, addrspace(5)
%TEMP3.y = alloca float, addrspace(5)
%TEMP3.x = alloca float, addrspace(5)
%TEMP2.w = alloca float, addrspace(5)
%TEMP2.z = alloca float, addrspace(5)
%TEMP2.y = alloca float, addrspace(5)
%TEMP2.x = alloca float, addrspace(5)
%TEMP1.w = alloca float, addrspace(5)
%TEMP1.z = alloca float, addrspace(5)
%TEMP1.y = alloca float, addrspace(5)
%TEMP1.x = alloca float, addrspace(5)
%TEMP0.w = alloca float, addrspace(5)
%TEMP0.z = alloca float, addrspace(5)
%TEMP0.y = alloca float, addrspace(5)
%TEMP0.x = alloca float, addrspace(5)
%OUT0.w = alloca float, addrspace(5)
%OUT0.z = alloca float, addrspace(5)
%OUT0.y = alloca float, addrspace(5)
%OUT0.x = alloca float, addrspace(5)
%22 = ptrtoint [0 x float] addrspace(2)* %2 to i64
%23 = bitcast i64 %22 to <2 x i32>
%24 = extractelement <2 x i32> %23, i32 0
%25 = extractelement <2 x i32> %23, i32 1
%26 = insertelement <4 x i32> undef, i32 %24, i32 0
%27 = insertelement <4 x i32> %26, i32 %25, i32 1
%28 = insertelement <4 x i32> %27, i32 96, i32 2
%29 = insertelement <4 x i32> %28, i32 163756, i32 3
%30 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %29, i32 0)
%31 = bitcast float %30 to i32
%32 = ptrtoint [0 x float] addrspace(2)* %2 to i64
%33 = bitcast i64 %32 to <2 x i32>
%34 = extractelement <2 x i32> %33, i32 0
%35 = extractelement <2 x i32> %33, i32 1
%36 = insertelement <4 x i32> undef, i32 %34, i32 0
%37 = insertelement <4 x i32> %36, i32 %35, i32 1
%38 = insertelement <4 x i32> %37, i32 96, i32 2
%39 = insertelement <4 x i32> %38, i32 163756, i32 3
%40 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %39, i32 4)
%41 = bitcast float %40 to i32
%42 = insertelement <2 x i32> undef, i32 %31, i32 0
%43 = insertelement <2 x i32> %42, i32 %41, i32 1
%44 = bitcast <2 x i32> %43 to i64
%45 = ptrtoint [0 x float] addrspace(2)* %2 to i64
%46 = bitcast i64 %45 to <2 x i32>
%47 = extractelement <2 x i32> %46, i32 0
%48 = extractelement <2 x i32> %46, i32 1
%49 = insertelement <4 x i32> undef, i32 %47, i32 0
%50 = insertelement <4 x i32> %49, i32 %48, i32 1
%51 = insertelement <4 x i32> %50, i32 96, i32 2
%52 = insertelement <4 x i32> %51, i32 163756, i32 3
%53 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %52, i32 32)
%54 = bitcast float %53 to i32
%55 = ptrtoint [0 x float] addrspace(2)* %2 to i64
%56 = bitcast i64 %55 to <2 x i32>
%57 = extractelement <2 x i32> %56, i32 0
%58 = extractelement <2 x i32> %56, i32 1
%59 = insertelement <4 x i32> undef, i32 %57, i32 0
%60 = insertelement <4 x i32> %59, i32 %58, i32 1
%61 = insertelement <4 x i32> %60, i32 96, i32 2
%62 = insertelement <4 x i32> %61, i32 163756, i32 3
%63 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %62, i32 36)
%64 = bitcast float %63 to i32
%65 = insertelement <2 x i32> undef, i32 %54, i32 0
%66 = insertelement <2 x i32> %65, i32 %64, i32 1
%67 = bitcast <2 x i32> %66 to i64
%68 = srem i64 %44, %67
%69 = bitcast i64 %68 to <2 x i32>
%70 = extractelement <2 x i32> %69, i32 0
%71 = extractelement <2 x i32> %69, i32 1
%72 = bitcast i32 %70 to float
store float %72, float addrspace(5)* %TEMP0.x
%73 = bitcast i32 %71 to float
store float %73, float addrspace(5)* %TEMP0.y
%74 = ptrtoint [0 x float] addrspace(2)* %2 to i64
%75 = bitcast i64 %74 to <2 x i32>
%76 = extractelement <2 x i32> %75, i32 0
%77 = extractelement <2 x i32> %75, i32 1
%78 = insertelement <4 x i32> undef, i32 %76, i32 0
%79 = insertelement <4 x i32> %78, i32 %77, i32 1
%80 = insertelement <4 x i32> %79, i32 96, i32 2
%81 = insertelement <4 x i32> %80, i32 163756, i32 3
%82 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %81, i32 8)
%83 = bitcast float %82 to i32
%84 = ptrtoint [0 x float] addrspace(2)* %2 to i64
%85 = bitcast i64 %84 to <2 x i32>
%86 = extractelement <2 x i32> %85, i32 0
%87 = extractelement <2 x i32> %85, i32 1
%88 = insertelement <4 x i32> undef, i32 %86, i32 0
%89 = insertelement <4 x i32> %88, i32 %87, i32 1
%90 = insertelement <4 x i32> %89, i32 96, i32 2
%91 = insertelement <4 x i32> %90, i32 163756, i32 3
%92 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %91, i32 12)
%93 = bitcast float %92 to i32
%94 = insertelement <2 x i32> undef, i32 %83, i32 0
%95 = insertelement <2 x i32> %94, i32 %93, i32 1
%96 = bitcast <2 x i32> %95 to i64
%97 = ptrtoint [0 x float] addrspace(2)* %2 to i64
%98 = bitcast i64 %97 to <2 x i32>
%99 = extractelement <2 x i32> %98, i32 0
%100 = extractelement <2 x i32> %98, i32 1
%101 = insertelement <4 x i32> undef, i32 %99, i32 0
%102 = insertelement <4 x i32> %101, i32 %100, i32 1
%103 = insertelement <4 x i32> %102, i32 96, i32 2
%104 = insertelement <4 x i32> %103, i32 163756, i32 3
%105 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %104, i32 40)
%106 = bitcast float %105 to i32
%107 = ptrtoint [0 x float] addrspace(2)* %2 to i64
%108 = bitcast i64 %107 to <2 x i32>
%109 = extractelement <2 x i32> %108, i32 0
%110 = extractelement <2 x i32> %108, i32 1
%111 = insertelement <4 x i32> undef, i32 %109, i32 0
%112 = insertelement <4 x i32> %111, i32 %110, i32 1
%113 = insertelement <4 x i32> %112, i32 96, i32 2
%114 = insertelement <4 x i32> %113, i32 163756, i32 3
%115 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %114, i32 44)
%116 = bitcast float %115 to i32
%117 = insertelement <2 x i32> undef, i32 %106, i32 0
%118 = insertelement <2 x i32> %117, i32 %116, i32 1
%119 = bitcast <2 x i32> %118 to i64
%120 = srem i64 %96, %119
%121 = bitcast i64 %120 to <2 x i32>
%122 = extractelement <2 x i32> %121, i32 0
%123 = extractelement <2 x i32> %121, i32 1
%124 = bitcast i32 %122 to float
store float %124, float addrspace(5)* %TEMP0.z
%125 = bitcast i32 %123 to float
store float %125, float addrspace(5)* %TEMP0.w
%126 = ptrtoint [0 x float] addrspace(2)* %2 to i64
%127 = bitcast i64 %126 to <2 x i32>
%128 = extractelement <2 x i32> %127, i32 0
%129 = extractelement <2 x i32> %127, i32 1
%130 = insertelement <4 x i32> undef, i32 %128, i32 0
%131 = insertelement <4 x i32> %130, i32 %129, i32 1
%132 = insertelement <4 x i32> %131, i32 96, i32 2
%133 = insertelement <4 x i32> %132, i32 163756, i32 3
%134 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %133, i32 16)
%135 = bitcast float %134 to i32
%136 = ptrtoint [0 x float] addrspace(2)* %2 to i64
%137 = bitcast i64 %136 to <2 x i32>
%138 = extractelement <2 x i32> %137, i32 0
%139 = extractelement <2 x i32> %137, i32 1
%140 = insertelement <4 x i32> undef, i32 %138, i32 0
%141 = insertelement <4 x i32> %140, i32 %139, i32 1
%142 = insertelement <4 x i32> %141, i32 96, i32 2
%143 = insertelement <4 x i32> %142, i32 163756, i32 3
%144 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %143, i32 20)
%145 = bitcast float %144 to i32
%146 = insertelement <2 x i32> undef, i32 %135, i32 0
%147 = insertelement <2 x i32> %146, i32 %145, i32 1
%148 = bitcast <2 x i32> %147 to i64
%149 = ptrtoint [0 x float] addrspace(2)* %2 to i64
%150 = bitcast i64 %149 to <2 x i32>
%151 = extractelement <2 x i32> %150, i32 0
%152 = extractelement <2 x i32> %150, i32 1
%153 = insertelement <4 x i32> undef, i32 %151, i32 0
%154 = insertelement <4 x i32> %153, i32 %152, i32 1
%155 = insertelement <4 x i32> %154, i32 96, i32 2
%156 = insertelement <4 x i32> %155, i32 163756, i32 3
%157 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %156, i32 48)
%158 = bitcast float %157 to i32
%159 = ptrtoint [0 x float] addrspace(2)* %2 to i64
%160 = bitcast i64 %159 to <2 x i32>
%161 = extractelement <2 x i32> %160, i32 0
%162 = extractelement <2 x i32> %160, i32 1
%163 = insertelement <4 x i32> undef, i32 %161, i32 0
%164 = insertelement <4 x i32> %163, i32 %162, i32 1
%165 = insertelement <4 x i32> %164, i32 96, i32 2
%166 = insertelement <4 x i32> %165, i32 163756, i32 3
%167 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %166, i32 52)
%168 = bitcast float %167 to i32
%169 = insertelement <2 x i32> undef, i32 %158, i32 0
%170 = insertelement <2 x i32> %169, i32 %168, i32 1
%171 = bitcast <2 x i32> %170 to i64
%172 = srem i64 %148, %171
%173 = bitcast i64 %172 to <2 x i32>
%174 = extractelement <2 x i32> %173, i32 0
%175 = extractelement <2 x i32> %173, i32 1
%176 = bitcast i32 %174 to float
store float %176, float addrspace(5)* %TEMP1.x
%177 = bitcast i32 %175 to float
store float %177, float addrspace(5)* %TEMP1.y
%178 = load float, float addrspace(5)* %TEMP0.y
%179 = load float, float addrspace(5)* %TEMP0.x
%180 = bitcast float %179 to i32
%181 = insertelement <2 x i32> undef, i32 %180, i32 0
%182 = bitcast float %178 to i32
%183 = insertelement <2 x i32> %181, i32 %182, i32 1
%184 = bitcast <2 x i32> %183 to i64
%185 = ptrtoint [0 x float] addrspace(2)* %2 to i64
%186 = bitcast i64 %185 to <2 x i32>
%187 = extractelement <2 x i32> %186, i32 0
%188 = extractelement <2 x i32> %186, i32 1
%189 = insertelement <4 x i32> undef, i32 %187, i32 0
%190 = insertelement <4 x i32> %189, i32 %188, i32 1
%191 = insertelement <4 x i32> %190, i32 96, i32 2
%192 = insertelement <4 x i32> %191, i32 163756, i32 3
%193 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %192, i32 64)
%194 = bitcast float %193 to i32
%195 = ptrtoint [0 x float] addrspace(2)* %2 to i64
%196 = bitcast i64 %195 to <2 x i32>
%197 = extractelement <2 x i32> %196, i32 0
%198 = extractelement <2 x i32> %196, i32 1
%199 = insertelement <4 x i32> undef, i32 %197, i32 0
%200 = insertelement <4 x i32> %199, i32 %198, i32 1
%201 = insertelement <4 x i32> %200, i32 96, i32 2
%202 = insertelement <4 x i32> %201, i32 163756, i32 3
%203 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %202, i32 68)
%204 = bitcast float %203 to i32
%205 = insertelement <2 x i32> undef, i32 %194, i32 0
%206 = insertelement <2 x i32> %205, i32 %204, i32 1
%207 = bitcast <2 x i32> %206 to i64
%208 = icmp eq i64 %184, %207
%209 = sext i1 %208 to i32
%210 = bitcast i32 %209 to float
store float %210, float addrspace(5)* %TEMP2.x
%211 = load float, float addrspace(5)* %TEMP0.w
%212 = load float, float addrspace(5)* %TEMP0.z
%213 = bitcast float %212 to i32
%214 = insertelement <2 x i32> undef, i32 %213, i32 0
%215 = bitcast float %211 to i32
%216 = insertelement <2 x i32> %214, i32 %215, i32 1
%217 = bitcast <2 x i32> %216 to i64
%218 = ptrtoint [0 x float] addrspace(2)* %2 to i64
%219 = bitcast i64 %218 to <2 x i32>
%220 = extractelement <2 x i32> %219, i32 0
%221 = extractelement <2 x i32> %219, i32 1
%222 = insertelement <4 x i32> undef, i32 %220, i32 0
%223 = insertelement <4 x i32> %222, i32 %221, i32 1
%224 = insertelement <4 x i32> %223, i32 96, i32 2
%225 = insertelement <4 x i32> %224, i32 163756, i32 3
%226 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %225, i32 72)
%227 = bitcast float %226 to i32
%228 = ptrtoint [0 x float] addrspace(2)* %2 to i64
%229 = bitcast i64 %228 to <2 x i32>
%230 = extractelement <2 x i32> %229, i32 0
%231 = extractelement <2 x i32> %229, i32 1
%232 = insertelement <4 x i32> undef, i32 %230, i32 0
%233 = insertelement <4 x i32> %232, i32 %231, i32 1
%234 = insertelement <4 x i32> %233, i32 96, i32 2
%235 = insertelement <4 x i32> %234, i32 163756, i32 3
%236 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %235, i32 76)
%237 = bitcast float %236 to i32
%238 = insertelement <2 x i32> undef, i32 %227, i32 0
%239 = insertelement <2 x i32> %238, i32 %237, i32 1
%240 = bitcast <2 x i32> %239 to i64
%241 = icmp eq i64 %217, %240
%242 = sext i1 %241 to i32
%243 = bitcast i32 %242 to float
store float %243, float addrspace(5)* %TEMP2.y
%244 = load float, float addrspace(5)* %TEMP1.y
%245 = load float, float addrspace(5)* %TEMP1.x
%246 = bitcast float %245 to i32
%247 = insertelement <2 x i32> undef, i32 %246, i32 0
%248 = bitcast float %244 to i32
%249 = insertelement <2 x i32> %247, i32 %248, i32 1
%250 = bitcast <2 x i32> %249 to i64
%251 = ptrtoint [0 x float] addrspace(2)* %2 to i64
%252 = bitcast i64 %251 to <2 x i32>
%253 = extractelement <2 x i32> %252, i32 0
%254 = extractelement <2 x i32> %252, i32 1
%255 = insertelement <4 x i32> undef, i32 %253, i32 0
%256 = insertelement <4 x i32> %255, i32 %254, i32 1
%257 = insertelement <4 x i32> %256, i32 96, i32 2
%258 = insertelement <4 x i32> %257, i32 163756, i32 3
%259 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %258, i32 80)
%260 = bitcast float %259 to i32
%261 = ptrtoint [0 x float] addrspace(2)* %2 to i64
%262 = bitcast i64 %261 to <2 x i32>
%263 = extractelement <2 x i32> %262, i32 0
%264 = extractelement <2 x i32> %262, i32 1
%265 = insertelement <4 x i32> undef, i32 %263, i32 0
%266 = insertelement <4 x i32> %265, i32 %264, i32 1
%267 = insertelement <4 x i32> %266, i32 96, i32 2
%268 = insertelement <4 x i32> %267, i32 163756, i32 3
%269 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %268, i32 84)
%270 = bitcast float %269 to i32
%271 = insertelement <2 x i32> undef, i32 %260, i32 0
%272 = insertelement <2 x i32> %271, i32 %270, i32 1
%273 = bitcast <2 x i32> %272 to i64
%274 = icmp eq i64 %250, %273
%275 = sext i1 %274 to i32
%276 = bitcast i32 %275 to float
store float %276, float addrspace(5)* %TEMP2.z
%277 = load float, float addrspace(5)* %TEMP2.y
%278 = bitcast float %277 to i32
%279 = load float, float addrspace(5)* %TEMP2.z
%280 = bitcast float %279 to i32
%281 = and i32 %278, %280
%282 = bitcast i32 %281 to float
store float %282, float addrspace(5)* %TEMP2.y
%283 = load float, float addrspace(5)* %TEMP2.x
%284 = bitcast float %283 to i32
%285 = load float, float addrspace(5)* %TEMP2.y
%286 = bitcast float %285 to i32
%287 = and i32 %284, %286
%288 = bitcast i32 %287 to float
store float %288, float addrspace(5)* %TEMP3.x
%289 = load float, float addrspace(5)* %TEMP3.x
%290 = load float, float addrspace(5)* %TEMP4.x
%291 = bitcast float %289 to i32
%292 = icmp ne i32 %291, 0
%293 = select i1 %292, float 0.000000e+00, float %290
%294 = load float, float addrspace(5)* %TEMP3.x
%295 = load float, float addrspace(5)* %TEMP4.y
%296 = bitcast float %294 to i32
%297 = icmp ne i32 %296, 0
%298 = select i1 %297, float 1.000000e+00, float %295
%299 = load float, float addrspace(5)* %TEMP3.x
%300 = load float, float addrspace(5)* %TEMP4.z
%301 = bitcast float %299 to i32
%302 = icmp ne i32 %301, 0
%303 = select i1 %302, float 0.000000e+00, float %300
%304 = load float, float addrspace(5)* %TEMP3.x
%305 = load float, float addrspace(5)* %TEMP4.w
%306 = bitcast float %304 to i32
%307 = icmp ne i32 %306, 0
%308 = select i1 %307, float 1.000000e+00, float %305
store float %293, float addrspace(5)* %TEMP4.x
store float %298, float addrspace(5)* %TEMP4.y
store float %303, float addrspace(5)* %TEMP4.z
store float %308, float addrspace(5)* %TEMP4.w
%309 = load float, float addrspace(5)* %TEMP3.x
%310 = bitcast float %309 to i32
%311 = xor i32 %310, -1
%312 = bitcast i32 %311 to float
store float %312, float addrspace(5)* %TEMP5.x
%313 = load float, float addrspace(5)* %TEMP5.x
%314 = load float, float addrspace(5)* %TEMP4.x
%315 = bitcast float %313 to i32
%316 = icmp ne i32 %315, 0
%317 = select i1 %316, float 1.000000e+00, float %314
%318 = load float, float addrspace(5)* %TEMP5.x
%319 = load float, float addrspace(5)* %TEMP4.y
%320 = bitcast float %318 to i32
%321 = icmp ne i32 %320, 0
%322 = select i1 %321, float 0.000000e+00, float %319
%323 = load float, float addrspace(5)* %TEMP5.x
%324 = load float, float addrspace(5)* %TEMP4.z
%325 = bitcast float %323 to i32
%326 = icmp ne i32 %325, 0
%327 = select i1 %326, float 0.000000e+00, float %324
%328 = load float, float addrspace(5)* %TEMP5.x
%329 = load float, float addrspace(5)* %TEMP4.w
%330 = bitcast float %328 to i32
%331 = icmp ne i32 %330, 0
%332 = select i1 %331, float 1.000000e+00, float %329
store float %317, float addrspace(5)* %TEMP4.x
store float %322, float addrspace(5)* %TEMP4.y
store float %327, float addrspace(5)* %TEMP4.z
store float %332, float addrspace(5)* %TEMP4.w
%333 = load float, float addrspace(5)* %TEMP4.x
%334 = load float, float addrspace(5)* %TEMP4.y
%335 = load float, float addrspace(5)* %TEMP4.z
%336 = load float, float addrspace(5)* %TEMP4.w
store float %333, float addrspace(5)* %OUT0.x
store float %334, float addrspace(5)* %OUT0.y
store float %335, float addrspace(5)* %OUT0.z
store float %336, float addrspace(5)* %OUT0.w
%337 = load float, float addrspace(5)* %OUT0.x
%338 = load float, float addrspace(5)* %OUT0.y
%339 = load float, float addrspace(5)* %OUT0.z
%340 = load float, float addrspace(5)* %OUT0.w
%341 = bitcast float %4 to i32
%342 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %341, 8
%343 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %342, float %337, 9
%344 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %343, float %338, 10
%345 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %344, float %339, 11
%346 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %345, float %340, 12
%347 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %346, float %20, 23
ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %347
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1
; Function Attrs: alwaysinline
define private amdgpu_ps void @ps_epilog(i64 inreg, i64 inreg, i64 inreg, i64 inreg, float inreg, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float) #2 {
main_body:
%20 = call nsz <2 x half> @llvm.amdgcn.cvt.pkrtz(float %5, float %6) #1
%21 = bitcast <2 x half> %20 to i32
%22 = bitcast i32 %21 to float
%23 = call nsz <2 x half> @llvm.amdgcn.cvt.pkrtz(float %7, float %8) #1
%24 = bitcast <2 x half> %23 to i32
%25 = bitcast i32 %24 to float
%26 = bitcast float %22 to <2 x i16>
%27 = bitcast float %25 to <2 x i16>
call void @llvm.amdgcn.exp.compr.v2i16(i32 0, i32 15, <2 x i16> %26, <2 x i16> %27, i1 true, i1 true) #4
ret void
}
; Function Attrs: nounwind readnone speculatable
declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #3
; Function Attrs: nounwind
declare void @llvm.amdgcn.exp.compr.v2i16(i32, i32, <2 x i16>, <2 x i16>, i1, i1) #4
define amdgpu_ps void @wrapper([0 x <4 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), [0 x float] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #5 {
main_body:
%22 = ptrtoint [0 x <4 x i32>] addrspace(2)* %0 to i64
%23 = bitcast i64 %22 to <2 x i32>
%24 = extractelement <2 x i32> %23, i32 0
%25 = extractelement <2 x i32> %23, i32 1
%26 = ptrtoint [0 x <8 x i32>] addrspace(2)* %1 to i64
%27 = bitcast i64 %26 to <2 x i32>
%28 = extractelement <2 x i32> %27, i32 0
%29 = extractelement <2 x i32> %27, i32 1
%30 = ptrtoint [0 x float] addrspace(2)* %2 to i64
%31 = bitcast i64 %30 to <2 x i32>
%32 = extractelement <2 x i32> %31, i32 0
%33 = extractelement <2 x i32> %31, i32 1
%34 = ptrtoint [0 x <8 x i32>] addrspace(2)* %3 to i64
%35 = bitcast i64 %34 to <2 x i32>
%36 = extractelement <2 x i32> %35, i32 0
%37 = extractelement <2 x i32> %35, i32 1
%38 = bitcast float %4 to i32
%39 = bitcast <2 x i32> %6 to <2 x float>
%40 = extractelement <2 x float> %39, i32 0
%41 = extractelement <2 x float> %39, i32 1
%42 = bitcast <2 x i32> %7 to <2 x float>
%43 = extractelement <2 x float> %42, i32 0
%44 = extractelement <2 x float> %42, i32 1
%45 = bitcast <2 x i32> %8 to <2 x float>
%46 = extractelement <2 x float> %45, i32 0
%47 = extractelement <2 x float> %45, i32 1
%48 = bitcast <3 x i32> %9 to <3 x float>
%49 = extractelement <3 x float> %48, i32 0
%50 = extractelement <3 x float> %48, i32 1
%51 = extractelement <3 x float> %48, i32 2
%52 = bitcast <2 x i32> %10 to <2 x float>
%53 = extractelement <2 x float> %52, i32 0
%54 = extractelement <2 x float> %52, i32 1
%55 = bitcast <2 x i32> %11 to <2 x float>
%56 = extractelement <2 x float> %55, i32 0
%57 = extractelement <2 x float> %55, i32 1
%58 = bitcast <2 x i32> %12 to <2 x float>
%59 = extractelement <2 x float> %58, i32 0
%60 = extractelement <2 x float> %58, i32 1
%61 = bitcast i32 %18 to float
%62 = bitcast i32 %19 to float
%63 = bitcast i32 %21 to float
%64 = insertelement <2 x i32> undef, i32 %24, i32 0
%65 = insertelement <2 x i32> %64, i32 %25, i32 1
%66 = bitcast <2 x i32> %65 to i64
%67 = inttoptr i64 %66 to [0 x <4 x i32>] addrspace(2)*
%68 = insertelement <2 x i32> undef, i32 %28, i32 0
%69 = insertelement <2 x i32> %68, i32 %29, i32 1
%70 = bitcast <2 x i32> %69 to i64
%71 = inttoptr i64 %70 to [0 x <8 x i32>] addrspace(2)*
%72 = insertelement <2 x i32> undef, i32 %32, i32 0
%73 = insertelement <2 x i32> %72, i32 %33, i32 1
%74 = bitcast <2 x i32> %73 to i64
%75 = inttoptr i64 %74 to [0 x float] addrspace(2)*
%76 = insertelement <2 x i32> undef, i32 %36, i32 0
%77 = insertelement <2 x i32> %76, i32 %37, i32 1
%78 = bitcast <2 x i32> %77 to i64
%79 = inttoptr i64 %78 to [0 x <8 x i32>] addrspace(2)*
%80 = bitcast i32 %38 to float
%81 = insertelement <2 x float> undef, float %40, i32 0
%82 = insertelement <2 x float> %81, float %41, i32 1
%83 = bitcast <2 x float> %82 to <2 x i32>
%84 = insertelement <2 x float> undef, float %43, i32 0
%85 = insertelement <2 x float> %84, float %44, i32 1
%86 = bitcast <2 x float> %85 to <2 x i32>
%87 = insertelement <2 x float> undef, float %46, i32 0
%88 = insertelement <2 x float> %87, float %47, i32 1
%89 = bitcast <2 x float> %88 to <2 x i32>
%90 = insertelement <3 x float> undef, float %49, i32 0
%91 = insertelement <3 x float> %90, float %50, i32 1
%92 = insertelement <3 x float> %91, float %51, i32 2
%93 = bitcast <3 x float> %92 to <3 x i32>
%94 = insertelement <2 x float> undef, float %53, i32 0
%95 = insertelement <2 x float> %94, float %54, i32 1
%96 = bitcast <2 x float> %95 to <2 x i32>
%97 = insertelement <2 x float> undef, float %56, i32 0
%98 = insertelement <2 x float> %97, float %57, i32 1
%99 = bitcast <2 x float> %98 to <2 x i32>
%100 = insertelement <2 x float> undef, float %59, i32 0
%101 = insertelement <2 x float> %100, float %60, i32 1
%102 = bitcast <2 x float> %101 to <2 x i32>
%103 = bitcast float %61 to i32
%104 = bitcast float %62 to i32
%105 = bitcast float %63 to i32
%106 = call <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([0 x <4 x i32>] addrspace(2)* %67, [0 x <8 x i32>] addrspace(2)* %71, [0 x float] addrspace(2)* %75, [0 x <8 x i32>] addrspace(2)* %79, float %80, i32 %5, <2 x i32> %83, <2 x i32> %86, <2 x i32> %89, <3 x i32> %93, <2 x i32> %96, <2 x i32> %99, <2 x i32> %102, float %13, float %14, float %15, float %16, float %17, i32 %103, i32 %104, float %20, i32 %105)
%107 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 0
%108 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 1
%109 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 2
%110 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 3
%111 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 4
%112 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 5
%113 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 6
%114 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 7
%115 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 8
%116 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 9
%117 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 10
%118 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 11
%119 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 12
%120 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 13
%121 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 14
%122 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 15
%123 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 16
%124 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 17
%125 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 18
%126 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 19
%127 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 20
%128 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 21
%129 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 22
%130 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 23
%131 = insertelement <2 x i32> undef, i32 %107, i32 0
%132 = insertelement <2 x i32> %131, i32 %108, i32 1
%133 = bitcast <2 x i32> %132 to i64
%134 = insertelement <2 x i32> undef, i32 %109, i32 0
%135 = insertelement <2 x i32> %134, i32 %110, i32 1
%136 = bitcast <2 x i32> %135 to i64
%137 = insertelement <2 x i32> undef, i32 %111, i32 0
%138 = insertelement <2 x i32> %137, i32 %112, i32 1
%139 = bitcast <2 x i32> %138 to i64
%140 = insertelement <2 x i32> undef, i32 %113, i32 0
%141 = insertelement <2 x i32> %140, i32 %114, i32 1
%142 = bitcast <2 x i32> %141 to i64
%143 = bitcast i32 %115 to float
call void @ps_epilog(i64 %133, i64 %136, i64 %139, i64 %142, float %143, float %116, float %117, float %118, float %119, float %120, float %121, float %122, float %123, float %124, float %125, float %126, float %127, float %128, float %129, float %130)
ret void
}
attributes #0 = { alwaysinline "no-signed-zeros-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { alwaysinline "InitialPSInputAddr"="16777215" "no-signed-zeros-fp-math"="true" }
attributes #3 = { nounwind readnone speculatable }
attributes #4 = { nounwind }
attributes #5 = { "no-signed-zeros-fp-math"="true" }
radeonsi: Compiling shader 4
SHADER KEY
part.ps.prolog.color_two_side = 0
part.ps.prolog.flatshade_colors = 0
part.ps.prolog.poly_stipple = 0
part.ps.prolog.force_persp_sample_interp = 0
part.ps.prolog.force_linear_sample_interp = 0
part.ps.prolog.force_persp_center_interp = 0
part.ps.prolog.force_linear_center_interp = 0
part.ps.prolog.bc_optimize_for_persp = 0
part.ps.prolog.bc_optimize_for_linear = 0
part.ps.epilog.spi_shader_col_format = 0x4
part.ps.epilog.color_is_int8 = 0x0
part.ps.epilog.color_is_int10 = 0x0
part.ps.epilog.last_cbuf = 0
part.ps.epilog.alpha_func = 7
part.ps.epilog.alpha_to_one = 0
part.ps.epilog.poly_line_smoothing = 0
part.ps.epilog.clamp_color = 0
Pixel Shader:
Shader main disassembly:
wrapper:
BB0_0:
s_mov_b32 s7, 0x27fac ; BE8703FF 00027FAC
s_movk_i32 s6, 0x60 ; B0060060
v_mov_b32_e32 v2, 0x4f800000 ; 7E0402FF 4F800000
v_mov_b32_e32 v3, 0x5f7ffffc ; 7E0602FF 5F7FFFFC
v_mov_b32_e32 v4, 0x2f800000 ; 7E0802FF 2F800000
v_mov_b32_e32 v5, 0xcf800000 ; 7E0A02FF CF800000
s_buffer_load_dwordx4 s[8:11], s[4:7], 0x0 ; C2840500
s_buffer_load_dwordx2 s[12:13], s[4:7], 0x4 ; C2460504
s_buffer_load_dwordx4 s[16:19], s[4:7], 0x8 ; C2880508
v_mov_b32_e32 v10, 0 ; 7E140280
s_waitcnt lgkmcnt(0) ; BF8C007F
s_ashr_i32 s14, s9, 31 ; 910E9F09
s_add_u32 s0, s8, s14 ; 80000E08
s_mov_b32 s15, s14 ; BE8F030E
s_addc_u32 s1, s9, s14 ; 82010E09
s_xor_b64 s[2:3], s[0:1], s[14:15] ; 89820E00
s_ashr_i32 s0, s17, 31 ; 91009F11
s_add_u32 s8, s16, s0 ; 80080010
s_mov_b32 s1, s0 ; BE810300
s_addc_u32 s9, s17, s0 ; 82090011
s_xor_b64 s[8:9], s[8:9], s[0:1] ; 89880008
v_cvt_f32_u32_e32 v0, s8 ; 7E000C08
v_cvt_f32_u32_e32 v1, s9 ; 7E020C09
s_sub_u32 s15, 0, s8 ; 808F0880
s_subb_u32 s16, 0, s9 ; 82900980
v_mac_f32_e32 v0, v1, v2 ; 3E000501
v_rcp_f32_e32 v0, v0 ; 7E005500
v_mul_f32_e32 v0, v0, v3 ; 10000700
v_mul_f32_e32 v1, v0, v4 ; 10020900
v_trunc_f32_e32 v1, v1 ; 7E024301
v_mac_f32_e32 v0, v1, v5 ; 3E000B01
v_cvt_u32_f32_e32 v0, v0 ; 7E000F00
v_cvt_u32_f32_e32 v1, v1 ; 7E020F01
v_mul_hi_u32 v6, s15, v0 ; D2D40006 0002000F
v_mul_lo_i32 v7, s15, v1 ; D2D60007 0002020F
v_mul_lo_i32 v8, s16, v0 ; D2D60008 00020010
v_add_i32_e32 v6, vcc, v6, v7 ; 4A0C0F06
v_mul_lo_i32 v7, s15, v0 ; D2D60007 0002000F
v_add_i32_e32 v6, vcc, v6, v8 ; 4A0C1106
v_mul_lo_i32 v8, v0, v6 ; D2D60008 00020D00
v_mul_hi_u32 v11, v0, v6 ; D2D4000B 00020D00
v_mul_hi_u32 v9, v0, v7 ; D2D40009 00020F00
v_mul_hi_u32 v12, v1, v6 ; D2D4000C 00020D01
v_mul_lo_i32 v6, v1, v6 ; D2D60006 00020D01
v_add_i32_e32 v8, vcc, v9, v8 ; 4A101109
v_addc_u32_e32 v9, vcc, 0, v11, vcc ; 50121680
v_mul_lo_i32 v11, v1, v7 ; D2D6000B 00020F01
v_mul_hi_u32 v7, v1, v7 ; D2D40007 00020F01
v_add_i32_e32 v8, vcc, v11, v8 ; 4A10110B
v_addc_u32_e32 v7, vcc, v9, v7, vcc ; 500E0F09
v_addc_u32_e32 v8, vcc, v12, v10, vcc ; 5010150C
v_add_i32_e32 v6, vcc, v7, v6 ; 4A0C0D07
v_add_i32_e64 v0, s[0:1], v0, v6 ; D24A0000 00020D00
v_addc_u32_e32 v7, vcc, 0, v8, vcc ; 500E1080
v_addc_u32_e64 v6, vcc, v1, v7, s[0:1] ; D2506A06 00020F01
v_mul_lo_i32 v8, s15, v6 ; D2D60008 00020C0F
v_mul_hi_u32 v9, s15, v0 ; D2D40009 0002000F
v_mul_lo_i32 v12, s16, v0 ; D2D6000C 00020010
v_mov_b32_e32 v11, s3 ; 7E160203
v_add_i32_e32 v8, vcc, v9, v8 ; 4A101109
v_mul_lo_i32 v9, s15, v0 ; D2D60009 0002000F
v_add_i32_e32 v8, vcc, v12, v8 ; 4A10110C
v_mul_lo_i32 v14, v0, v8 ; D2D6000E 00021100
v_mul_hi_u32 v16, v0, v8 ; D2D40010 00021100
v_mul_hi_u32 v15, v0, v9 ; D2D4000F 00021300
v_mul_hi_u32 v13, v6, v9 ; D2D4000D 00021306
v_mul_lo_i32 v9, v6, v9 ; D2D60009 00021306
v_mul_hi_u32 v12, v6, v8 ; D2D4000C 00021106
v_add_i32_e32 v14, vcc, v15, v14 ; 4A1C1D0F
v_addc_u32_e32 v15, vcc, 0, v16, vcc ; 501E2080
v_mul_lo_i32 v6, v6, v8 ; D2D60006 00021106
v_add_i32_e32 v9, vcc, v9, v14 ; 4A121D09
v_addc_u32_e32 v9, vcc, v15, v13, vcc ; 50121B0F
v_addc_u32_e32 v8, vcc, v12, v10, vcc ; 5010150C
v_add_i32_e32 v6, vcc, v9, v6 ; 4A0C0D09
v_addc_u32_e32 v8, vcc, 0, v8, vcc ; 50101080
v_add_i32_e32 v1, vcc, v1, v7 ; 4A020F01
v_addc_u32_e64 v1, vcc, v1, v8, s[0:1] ; D2506A01 00021101
v_add_i32_e32 v0, vcc, v0, v6 ; 4A000D00
v_addc_u32_e32 v1, vcc, 0, v1, vcc ; 50020280
v_mul_lo_i32 v6, s2, v1 ; D2D60006 00020202
v_mul_hi_u32 v7, s2, v0 ; D2D40007 00020002
v_mul_hi_u32 v9, s2, v1 ; D2D40009 00020202
v_mul_hi_u32 v12, s3, v1 ; D2D4000C 00020203
v_mul_lo_i32 v1, s3, v1 ; D2D60001 00020203
v_add_i32_e32 v6, vcc, v7, v6 ; 4A0C0D07
v_addc_u32_e32 v7, vcc, 0, v9, vcc ; 500E1280
v_mul_lo_i32 v9, s3, v0 ; D2D60009 00020003
v_mul_hi_u32 v0, s3, v0 ; D2D40000 00020003
v_mov_b32_e32 v8, s14 ; 7E10020E
v_add_i32_e32 v6, vcc, v9, v6 ; 4A0C0D09
v_addc_u32_e32 v0, vcc, v7, v0, vcc ; 50000107
v_addc_u32_e32 v6, vcc, v12, v10, vcc ; 500C150C
v_add_i32_e32 v0, vcc, v0, v1 ; 4A000300
v_addc_u32_e32 v1, vcc, 0, v6, vcc ; 50020C80
v_mul_lo_i32 v1, s8, v1 ; D2D60001 00020208
v_mul_hi_u32 v6, s8, v0 ; D2D40006 00020008
v_mul_lo_i32 v9, s9, v0 ; D2D60009 00020009
v_mul_lo_i32 v0, s8, v0 ; D2D60000 00020008
v_mov_b32_e32 v7, s9 ; 7E0E0209
v_add_i32_e32 v1, vcc, v6, v1 ; 4A020306
v_add_i32_e32 v1, vcc, v1, v9 ; 4A021301
v_sub_i32_e32 v6, vcc, s3, v1 ; 4C0C0203
v_sub_i32_e32 v0, vcc, s2, v0 ; 4C000002
v_subb_u32_e64 v6, s[0:1], v6, v7, vcc ; D2520006 01AA0F06
v_subrev_i32_e64 v9, s[0:1], s8, v0 ; D24E0009 00020008
v_subb_u32_e64 v7, s[2:3], v6, v7, s[0:1] ; D2520207 00020F06
v_subb_u32_e64 v6, s[0:1], v6, 0, s[0:1] ; D2520006 00010106
v_cmp_le_u32_e64 s[0:1], s9, v6 ; D1860000 00020C09
v_cndmask_b32_e64 v12, 0, -1, s[0:1] ; D200000C 00018280
v_cmp_le_u32_e64 s[0:1], s8, v9 ; D1860000 00021208
s_ashr_i32 s2, s19, 31 ; 91029F13
v_cndmask_b32_e64 v13, 0, -1, s[0:1] ; D200000D 00018280
v_cmp_eq_u32_e64 s[0:1], s9, v6 ; D1840000 00020C09
v_cndmask_b32_e64 v12, v12, v13, s[0:1] ; D200000C 00021B0C
v_subrev_i32_e64 v13, s[0:1], s8, v9 ; D24E000D 00021208
s_add_u32 s16, s18, s2 ; 80100212
v_subb_u32_e64 v7, s[0:1], v7, 0, s[0:1] ; D2520007 00010107
s_mov_b32 s3, s2 ; BE830302
s_addc_u32 s17, s19, s2 ; 82110213
v_cmp_ne_u32_e64 s[0:1], 0, v12 ; D18A0000 00021880
s_xor_b64 s[20:21], s[16:17], s[2:3] ; 89940210
v_subb_u32_e32 v1, vcc, v11, v1, vcc ; 5202030B
v_cndmask_b32_e64 v6, v6, v7, s[0:1] ; D2000006 00020F06
v_cvt_f32_u32_e32 v7, s20 ; 7E0E0C14
v_cvt_f32_u32_e32 v11, s21 ; 7E160C15
v_cmp_le_u32_e32 vcc, s9, v1 ; 7D860209
v_cndmask_b32_e64 v12, 0, -1, vcc ; D200000C 01A98280
v_cmp_le_u32_e32 vcc, s8, v0 ; 7D860008
v_mac_f32_e32 v7, v11, v2 ; 3E0E050B
v_rcp_f32_e32 v7, v7 ; 7E0E5507
v_cndmask_b32_e64 v14, 0, -1, vcc ; D200000E 01A98280
v_cmp_eq_u32_e32 vcc, s9, v1 ; 7D840209
v_cndmask_b32_e32 v11, v12, v14, vcc ; 00161D0C
v_cmp_ne_u32_e64 s[2:3], 0, v11 ; D18A0002 00021680
v_cndmask_b32_e64 v1, v1, v6, s[2:3] ; D2000001 000A0D01
v_mul_f32_e32 v6, v7, v3 ; 100C0707
v_mul_f32_e32 v7, v6, v4 ; 100E0906
v_trunc_f32_e32 v7, v7 ; 7E0E4307
v_mac_f32_e32 v6, v7, v5 ; 3E0C0B07
v_cvt_u32_f32_e32 v6, v6 ; 7E0C0F06
v_cvt_u32_f32_e32 v7, v7 ; 7E0E0F07
s_sub_u32 s8, 0, s20 ; 80881480
s_subb_u32 s9, 0, s21 ; 82891580
v_mul_hi_u32 v11, s8, v6 ; D2D4000B 00020C08
v_mul_lo_i32 v12, s8, v7 ; D2D6000C 00020E08
v_cndmask_b32_e64 v9, v9, v13, s[0:1] ; D2000009 00021B09
v_mul_lo_i32 v13, s9, v6 ; D2D6000D 00020C09
v_cndmask_b32_e64 v0, v0, v9, s[2:3] ; D2000000 000A1300
v_add_i32_e32 v11, vcc, v11, v12 ; 4A16190B
v_mul_lo_i32 v12, s8, v6 ; D2D6000C 00020C08
v_add_i32_e32 v11, vcc, v11, v13 ; 4A161B0B
v_mul_lo_i32 v13, v6, v11 ; D2D6000D 00021706
v_mul_hi_u32 v9, v6, v11 ; D2D40009 00021706
v_mul_hi_u32 v14, v6, v12 ; D2D4000E 00021906
v_mul_hi_u32 v15, v7, v11 ; D2D4000F 00021707
v_mul_lo_i32 v11, v7, v11 ; D2D6000B 00021707
v_xor_b32_e32 v0, s14, v0 ; 3A00000E
v_add_i32_e32 v13, vcc, v14, v13 ; 4A1A1B0E
v_mul_lo_i32 v14, v7, v12 ; D2D6000E 00021907
v_mul_hi_u32 v12, v7, v12 ; D2D4000C 00021907
v_addc_u32_e32 v9, vcc, 0, v9, vcc ; 50121280
v_xor_b32_e32 v1, s14, v1 ; 3A02020E
v_add_i32_e32 v13, vcc, v14, v13 ; 4A1A1B0E
v_addc_u32_e32 v9, vcc, v9, v12, vcc ; 50121909
v_addc_u32_e32 v12, vcc, v15, v10, vcc ; 5018150F
v_add_i32_e32 v9, vcc, v9, v11 ; 4A121709
v_add_i32_e64 v6, s[0:1], v6, v9 ; D24A0006 00021306
v_addc_u32_e32 v11, vcc, 0, v12, vcc ; 50161880
v_addc_u32_e64 v9, vcc, v7, v11, s[0:1] ; D2506A09 00021707
v_mul_lo_i32 v12, s8, v9 ; D2D6000C 00021208
v_mul_hi_u32 v13, s8, v6 ; D2D4000D 00020C08
v_mul_lo_i32 v14, s9, v6 ; D2D6000E 00020C09
v_add_i32_e32 v12, vcc, v13, v12 ; 4A18190D
v_mul_lo_i32 v13, s8, v6 ; D2D6000D 00020C08
v_add_i32_e32 v12, vcc, v14, v12 ; 4A18190E
v_mul_lo_i32 v16, v6, v12 ; D2D60010 00021906
v_mul_hi_u32 v18, v6, v12 ; D2D40012 00021906
v_mul_hi_u32 v17, v6, v13 ; D2D40011 00021B06
v_mul_hi_u32 v15, v9, v13 ; D2D4000F 00021B09
v_mul_lo_i32 v13, v9, v13 ; D2D6000D 00021B09
v_mul_hi_u32 v14, v9, v12 ; D2D4000E 00021909
v_add_i32_e32 v16, vcc, v17, v16 ; 4A202111
v_addc_u32_e32 v17, vcc, 0, v18, vcc ; 50222480
v_mul_lo_i32 v9, v9, v12 ; D2D60009 00021909
v_add_i32_e32 v13, vcc, v13, v16 ; 4A1A210D
v_addc_u32_e32 v13, vcc, v17, v15, vcc ; 501A1F11
v_addc_u32_e32 v12, vcc, v14, v10, vcc ; 5018150E
v_add_i32_e32 v9, vcc, v13, v9 ; 4A12130D
v_addc_u32_e32 v12, vcc, 0, v12, vcc ; 50181880
v_add_i32_e32 v7, vcc, v7, v11 ; 4A0E1707
s_ashr_i32 s8, s11, 31 ; 91089F0B
v_addc_u32_e64 v7, vcc, v7, v12, s[0:1] ; D2506A07 00021907
s_add_u32 s0, s10, s8 ; 8000080A
v_add_i32_e32 v6, vcc, v6, v9 ; 4A0C1306
s_mov_b32 s9, s8 ; BE890308
s_addc_u32 s1, s11, s8 ; 8201080B
s_xor_b64 s[10:11], s[0:1], s[8:9] ; 898A0800
v_addc_u32_e32 v7, vcc, 0, v7, vcc ; 500E0E80
v_mul_lo_i32 v9, s10, v7 ; D2D60009 00020E0A
v_mul_hi_u32 v11, s10, v6 ; D2D4000B 00020C0A
v_mul_hi_u32 v12, s10, v7 ; D2D4000C 00020E0A
v_mul_hi_u32 v13, s11, v7 ; D2D4000D 00020E0B
v_mul_lo_i32 v7, s11, v7 ; D2D60007 00020E0B
v_add_i32_e32 v9, vcc, v11, v9 ; 4A12130B
v_addc_u32_e32 v11, vcc, 0, v12, vcc ; 50161880
v_mul_lo_i32 v12, s11, v6 ; D2D6000C 00020C0B
v_mul_hi_u32 v6, s11, v6 ; D2D40006 00020C0B
v_add_i32_e32 v9, vcc, v12, v9 ; 4A12130C
v_addc_u32_e32 v6, vcc, v11, v6, vcc ; 500C0D0B
v_addc_u32_e32 v9, vcc, v13, v10, vcc ; 5012150D
v_add_i32_e32 v6, vcc, v6, v7 ; 4A0C0F06
v_addc_u32_e32 v7, vcc, 0, v9, vcc ; 500E1280
v_subrev_i32_e32 v0, vcc, s14, v0 ; 4E00000E
v_mul_lo_i32 v7, s20, v7 ; D2D60007 00020E14
v_mul_hi_u32 v9, s20, v6 ; D2D40009 00020C14
v_subb_u32_e32 v1, vcc, v1, v8, vcc ; 52021101
v_mul_lo_i32 v8, s21, v6 ; D2D60008 00020C15
v_mul_lo_i32 v6, s20, v6 ; D2D60006 00020C14
v_add_i32_e32 v7, vcc, v9, v7 ; 4A0E0F09
v_mov_b32_e32 v9, s21 ; 7E120215
v_add_i32_e32 v7, vcc, v7, v8 ; 4A0E1107
v_sub_i32_e32 v8, vcc, s11, v7 ; 4C100E0B
v_sub_i32_e32 v6, vcc, s10, v6 ; 4C0C0C0A
v_subb_u32_e64 v8, s[0:1], v8, v9, vcc ; D2520008 01AA1308
v_subrev_i32_e64 v11, s[0:1], s20, v6 ; D24E000B 00020C14
v_subb_u32_e64 v9, s[2:3], v8, v9, s[0:1] ; D2520209 00021308
v_subb_u32_e64 v8, s[0:1], v8, 0, s[0:1] ; D2520008 00010108
s_buffer_load_dwordx2 s[2:3], s[4:7], 0xc ; C241050C
v_cmp_le_u32_e64 s[0:1], s21, v8 ; D1860000 00021015
v_cndmask_b32_e64 v12, 0, -1, s[0:1] ; D200000C 00018280
v_cmp_le_u32_e64 s[0:1], s20, v11 ; D1860000 00021614
v_cndmask_b32_e64 v13, 0, -1, s[0:1] ; D200000D 00018280
v_cmp_eq_u32_e64 s[0:1], s21, v8 ; D1840000 00021015
v_cndmask_b32_e64 v12, v12, v13, s[0:1] ; D200000C 00021B0C
v_subrev_i32_e64 v13, s[0:1], s20, v11 ; D24E000D 00021614
v_subb_u32_e64 v9, s[0:1], v9, 0, s[0:1] ; D2520009 00010109
s_buffer_load_dwordx4 s[16:19], s[4:7], 0x10 ; C2880510
s_waitcnt lgkmcnt(0) ; BF8C007F
s_ashr_i32 s10, s3, 31 ; 910A9F03
s_add_u32 s2, s2, s10 ; 80020A02
v_cmp_ne_u32_e64 s[0:1], 0, v12 ; D18A0000 00021880
v_cndmask_b32_e64 v8, v8, v9, s[0:1] ; D2000008 00021308
v_mov_b32_e32 v9, s11 ; 7E12020B
s_mov_b32 s11, s10 ; BE8B030A
s_addc_u32 s3, s3, s10 ; 82030A03
s_xor_b64 s[10:11], s[2:3], s[10:11] ; 898A0A02
v_cvt_f32_u32_e32 v12, s10 ; 7E180C0A
v_cvt_f32_u32_e32 v14, s11 ; 7E1C0C0B
v_subb_u32_e32 v7, vcc, v9, v7, vcc ; 520E0F09
v_cmp_le_u32_e32 vcc, s21, v7 ; 7D860E15
v_cndmask_b32_e64 v9, 0, -1, vcc ; D2000009 01A98280
v_mac_f32_e32 v12, v14, v2 ; 3E18050E
v_rcp_f32_e32 v2, v12 ; 7E04550C
v_cmp_le_u32_e32 vcc, s20, v6 ; 7D860C14
v_cndmask_b32_e64 v15, 0, -1, vcc ; D200000F 01A98280
v_cmp_eq_u32_e32 vcc, s21, v7 ; 7D840E15
v_mul_f32_e32 v2, v2, v3 ; 10040702
v_mul_f32_e32 v3, v2, v4 ; 10060902
v_trunc_f32_e32 v3, v3 ; 7E064303
v_mac_f32_e32 v2, v3, v5 ; 3E040B03
v_cvt_u32_f32_e32 v2, v2 ; 7E040F02
v_cvt_u32_f32_e32 v3, v3 ; 7E060F03
v_cndmask_b32_e32 v9, v9, v15, vcc ; 00121F09
v_cmp_ne_u32_e32 vcc, 0, v9 ; 7D8A1280
v_cndmask_b32_e32 v7, v7, v8, vcc ; 000E1107
s_sub_u32 s2, 0, s10 ; 80820A80
v_cndmask_b32_e64 v8, v11, v13, s[0:1] ; D2000008 00021B0B
v_cndmask_b32_e32 v4, v6, v8, vcc ; 00081106
v_mul_hi_u32 v5, s2, v2 ; D2D40005 00020402
v_mul_lo_i32 v6, s2, v3 ; D2D60006 00020602
s_subb_u32 s3, 0, s11 ; 82830B80
v_mul_lo_i32 v8, s3, v2 ; D2D60008 00020403
s_ashr_i32 s14, s13, 31 ; 910E9F0D
v_add_i32_e32 v5, vcc, v5, v6 ; 4A0A0D05
v_mul_lo_i32 v6, s2, v2 ; D2D60006 00020402
v_add_i32_e32 v5, vcc, v5, v8 ; 4A0A1105
v_mul_lo_i32 v8, v2, v5 ; D2D60008 00020B02
v_mul_hi_u32 v11, v2, v5 ; D2D4000B 00020B02
v_mul_hi_u32 v9, v2, v6 ; D2D40009 00020D02
v_mul_hi_u32 v12, v3, v5 ; D2D4000C 00020B03
v_mul_lo_i32 v5, v3, v5 ; D2D60005 00020B03
s_mov_b32 s15, s14 ; BE8F030E
v_add_i32_e32 v8, vcc, v9, v8 ; 4A101109
v_addc_u32_e32 v9, vcc, 0, v11, vcc ; 50121680
v_mul_lo_i32 v11, v3, v6 ; D2D6000B 00020D03
v_mul_hi_u32 v6, v3, v6 ; D2D40006 00020D03
v_xor_b32_e32 v4, s8, v4 ; 3A080808
v_xor_b32_e32 v7, s8, v7 ; 3A0E0E08
v_add_i32_e32 v8, vcc, v11, v8 ; 4A10110B
v_addc_u32_e32 v6, vcc, v9, v6, vcc ; 500C0D09
v_addc_u32_e32 v8, vcc, v12, v10, vcc ; 5010150C
v_add_i32_e32 v5, vcc, v6, v5 ; 4A0A0B06
v_add_i32_e64 v2, s[0:1], v2, v5 ; D24A0002 00020B02
v_addc_u32_e32 v6, vcc, 0, v8, vcc ; 500C1080
v_addc_u32_e64 v5, vcc, v3, v6, s[0:1] ; D2506A05 00020D03
v_mul_lo_i32 v8, s2, v5 ; D2D60008 00020A02
v_mul_hi_u32 v9, s2, v2 ; D2D40009 00020402
v_mul_lo_i32 v11, s3, v2 ; D2D6000B 00020403
v_add_i32_e32 v8, vcc, v9, v8 ; 4A101109
v_mul_lo_i32 v9, s2, v2 ; D2D60009 00020402
v_add_i32_e32 v8, vcc, v11, v8 ; 4A10110B
v_mul_lo_i32 v13, v2, v8 ; D2D6000D 00021102
v_mul_hi_u32 v15, v2, v8 ; D2D4000F 00021102
v_mul_hi_u32 v14, v2, v9 ; D2D4000E 00021302
v_mul_hi_u32 v12, v5, v9 ; D2D4000C 00021305
v_mul_lo_i32 v9, v5, v9 ; D2D60009 00021305
v_mul_hi_u32 v11, v5, v8 ; D2D4000B 00021105
v_add_i32_e32 v13, vcc, v14, v13 ; 4A1A1B0E
v_addc_u32_e32 v14, vcc, 0, v15, vcc ; 501C1E80
v_mul_lo_i32 v5, v5, v8 ; D2D60005 00021105
v_add_i32_e32 v9, vcc, v9, v13 ; 4A121B09
v_addc_u32_e32 v9, vcc, v14, v12, vcc ; 5012190E
v_addc_u32_e32 v8, vcc, v11, v10, vcc ; 5010150B
v_add_i32_e32 v5, vcc, v9, v5 ; 4A0A0B09
v_addc_u32_e32 v8, vcc, 0, v8, vcc ; 50101080
v_add_i32_e32 v3, vcc, v3, v6 ; 4A060D03
v_addc_u32_e64 v3, vcc, v3, v8, s[0:1] ; D2506A03 00021103
s_add_u32 s0, s12, s14 ; 80000E0C
s_addc_u32 s1, s13, s14 ; 82010E0D
v_add_i32_e32 v2, vcc, v2, v5 ; 4A040B02
s_xor_b64 s[12:13], s[0:1], s[14:15] ; 898C0E00
v_addc_u32_e32 v3, vcc, 0, v3, vcc ; 50060680
v_mul_lo_i32 v5, s12, v3 ; D2D60005 0002060C
v_mul_hi_u32 v6, s12, v2 ; D2D40006 0002040C
v_mul_hi_u32 v9, s12, v3 ; D2D40009 0002060C
v_mul_hi_u32 v11, s13, v3 ; D2D4000B 0002060D
v_mul_lo_i32 v3, s13, v3 ; D2D60003 0002060D
v_add_i32_e32 v5, vcc, v6, v5 ; 4A0A0B06
v_addc_u32_e32 v6, vcc, 0, v9, vcc ; 500C1280
v_mul_lo_i32 v9, s13, v2 ; D2D60009 0002040D
v_mul_hi_u32 v2, s13, v2 ; D2D40002 0002040D
v_mov_b32_e32 v8, s8 ; 7E100208
v_add_i32_e32 v5, vcc, v9, v5 ; 4A0A0B09
v_addc_u32_e32 v2, vcc, v6, v2, vcc ; 50040506
v_addc_u32_e32 v5, vcc, v11, v10, vcc ; 500A150B
v_add_i32_e32 v6, vcc, v2, v3 ; 4A0C0702
v_addc_u32_e32 v2, vcc, 0, v5, vcc ; 50040A80
v_mul_lo_i32 v5, s10, v2 ; D2D60005 0002040A
v_mul_hi_u32 v9, s10, v6 ; D2D40009 00020C0A
v_subrev_i32_e32 v2, vcc, s8, v4 ; 4E040808
v_mul_lo_i32 v4, s11, v6 ; D2D60004 00020C0B
v_subb_u32_e32 v3, vcc, v7, v8, vcc ; 52061107
v_add_i32_e32 v5, vcc, v9, v5 ; 4A0A0B09
v_mov_b32_e32 v7, s11 ; 7E0E020B
v_add_i32_e32 v4, vcc, v5, v4 ; 4A080905
v_mul_lo_i32 v5, s10, v6 ; D2D60005 00020C0A
v_sub_i32_e32 v6, vcc, s13, v4 ; 4C0C080D
v_sub_i32_e32 v5, vcc, s12, v5 ; 4C0A0A0C
v_subb_u32_e64 v6, s[0:1], v6, v7, vcc ; D2520006 01AA0F06
v_subrev_i32_e64 v8, s[0:1], s10, v5 ; D24E0008 00020A0A
v_subb_u32_e64 v7, s[2:3], v6, v7, s[0:1] ; D2520207 00020F06
v_subb_u32_e64 v6, s[0:1], v6, 0, s[0:1] ; D2520006 00010106
v_cmp_le_u32_e64 s[0:1], s11, v6 ; D1860000 00020C0B
v_cndmask_b32_e64 v9, 0, -1, s[0:1] ; D2000009 00018280
v_cmp_le_u32_e64 s[0:1], s10, v8 ; D1860000 0002100A
v_cndmask_b32_e64 v10, 0, -1, s[0:1] ; D200000A 00018280
v_cmp_eq_u32_e64 s[0:1], s11, v6 ; D1840000 00020C0B
v_cndmask_b32_e64 v9, v9, v10, s[0:1] ; D2000009 00021509
v_subrev_i32_e64 v10, s[0:1], s10, v8 ; D24E000A 0002100A
v_subb_u32_e64 v7, s[0:1], v7, 0, s[0:1] ; D2520007 00010107
v_cmp_ne_u32_e64 s[0:1], 0, v9 ; D18A0000 00021280
v_cndmask_b32_e64 v6, v6, v7, s[0:1] ; D2000006 00020F06
v_mov_b32_e32 v7, s13 ; 7E0E020D
v_subb_u32_e32 v4, vcc, v7, v4, vcc ; 52080907
v_cmp_le_u32_e32 vcc, s11, v4 ; 7D86080B
v_cndmask_b32_e64 v7, 0, -1, vcc ; D2000007 01A98280
v_cmp_le_u32_e32 vcc, s10, v5 ; 7D860A0A
v_cndmask_b32_e64 v9, 0, -1, vcc ; D2000009 01A98280
v_cmp_eq_u32_e32 vcc, s11, v4 ; 7D84080B
v_cndmask_b32_e32 v7, v7, v9, vcc ; 000E1307
v_cmp_ne_u32_e32 vcc, 0, v7 ; 7D8A0E80
v_cndmask_b32_e32 v4, v4, v6, vcc ; 00080D04
v_cndmask_b32_e64 v6, v8, v10, s[0:1] ; D2000006 00021508
s_buffer_load_dwordx2 s[0:1], s[4:7], 0x14 ; C2400514
v_cndmask_b32_e32 v5, v5, v6, vcc ; 000A0D05
v_xor_b32_e32 v5, s14, v5 ; 3A0A0A0E
v_xor_b32_e32 v6, s14, v4 ; 3A0C080E
v_mov_b32_e32 v7, s14 ; 7E0E020E
v_subrev_i32_e32 v4, vcc, s14, v5 ; 4E080A0E
v_subb_u32_e32 v5, vcc, v6, v7, vcc ; 520A0F06
v_cmp_eq_u64_e32 vcc, s[18:19], v[2:3] ; 7DC40412
s_waitcnt lgkmcnt(0) ; BF8C007F
v_cmp_eq_u64_e64 s[0:1], s[0:1], v[4:5] ; D1C40000 00020800
v_cmp_eq_u64_e64 s[2:3], s[16:17], v[0:1] ; D1C40002 00020010
s_and_b64 s[0:1], vcc, s[0:1] ; 8780006A
s_and_b64 s[0:1], s[2:3], s[0:1] ; 87800002
v_cndmask_b32_e64 v0, 1.0, 0, s[0:1] ; D2000000 000100F2
v_cndmask_b32_e64 v1, 0, 1.0, s[0:1] ; D2000001 0001E480
v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300
v_bfrev_b32_e32 v1, 60 ; 7E0270BC
exp mrt0 v0, v0, v1, v1 done compr vm ; F8001C0F 00000100
s_endpgm ; BF810000
*** SHADER CONFIG ***
SPI_PS_INPUT_ADDR = 0x0001
SPI_PS_INPUT_ENA = 0x0001
*** SHADER STATS ***
SGPRS: 24
VGPRS: 20
Spilled SGPRs: 0
Spilled VGPRs: 0
Private memory VGPRs: 0
Code Size: 2260 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
Max Waves: 10
********************
More information about the llvm-commits
mailing list