[llvm] r317753 - AMDGPU: Merge BUFFER_LOAD_DWORD_OFFSET into x2, x4

Michel Dänzer via llvm-commits llvm-commits at lists.llvm.org
Thu Nov 9 08:11:46 PST 2017


Hi Marek,


On 09/11/17 02:52 AM, Marek Olsak via llvm-commits wrote:
> Author: mareko
> Date: Wed Nov  8 17:52:36 2017
> New Revision: 317753
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=317753&view=rev
> Log:
> AMDGPU: Merge BUFFER_LOAD_DWORD_OFFSET into x2, x4

This change broke a bunch of
spec at glsl-1{1,2}0 at execution@variable-indexing at vs-varying-array-mat4-index-*
tests on my Tonga, e.g.

PIGLIT TEST: 73 - vs-varying-array-mat4-index-col-wr
Probe color at (10,10)
  Expected: 0.000000 1.000000 0.000000
  Observed: 1.000000 0.000000 0.000000
Test failure on line 99
Probe color at (25,10)
  Expected: 0.000000 1.000000 0.000000
  Observed: 1.000000 0.000000 0.000000
Test failure on line 107
Probe color at (40,10)
  Expected: 0.000000 1.000000 0.000000
  Observed: 1.000000 0.000000 0.000000
Test failure on line 115
[...]


These tests consistently pass without this change and fail with it.


I'm attaching the dump of an affected shader with and without this
change. Let me know if you need more information.


-- 
Earthling Michel Dänzer               |               http://www.amd.com
Libre software enthusiast             |             Mesa and X developer
-------------- next part --------------
VERT
PROPERTY NEXT_SHADER FRAG
DCL IN[0]
DCL OUT[0], POSITION
DCL OUT[1..12], ARRAY(1), GENERIC[0]
DCL CONST[0][0..10]
DCL TEMP[0..2], LOCAL
DCL TEMP[3..6], ARRAY(1), LOCAL
DCL TEMP[7..10], ARRAY(2), LOCAL
DCL TEMP[11..14], ARRAY(3), LOCAL
DCL TEMP[15..22], LOCAL
IMM[0] FLT32 {    0.0000,     0.0000,     0.0000,     0.0000}
IMM[1] INT32 {4, 0, 1, 2}
IMM[2] INT32 {3, 0, 0, 0}
  0: MUL TEMP[0], CONST[0][7], IN[0].xxxx
  1: MAD TEMP[1], CONST[0][8], IN[0].yyyy, TEMP[0]
  2: MAD TEMP[2], CONST[0][9], IN[0].zzzz, TEMP[1]
  3: MAD OUT[0], CONST[0][10], IN[0].wwww, TEMP[2]
  4: MOV TEMP[3], IMM[0].xxxx
  5: MOV TEMP[4], IMM[0].xxxx
  6: MOV TEMP[5], IMM[0].xxxx
  7: MOV TEMP[6], IMM[0].xxxx
  8: MOV OUT[1], TEMP[3]
  9: MOV OUT[2], TEMP[4]
 10: MOV OUT[3], TEMP[5]
 11: MOV OUT[4], TEMP[6]
 12: MOV TEMP[7], IMM[0].xxxx
 13: MOV TEMP[8], IMM[0].xxxx
 14: MOV TEMP[9], IMM[0].xxxx
 15: MOV TEMP[10], IMM[0].xxxx
 16: MOV OUT[5], TEMP[7]
 17: MOV OUT[6], TEMP[8]
 18: MOV OUT[7], TEMP[9]
 19: MOV OUT[8], TEMP[10]
 20: MOV TEMP[11], IMM[0].xxxx
 21: MOV TEMP[12], IMM[0].xxxx
 22: MOV TEMP[13], IMM[0].xxxx
 23: MOV TEMP[14], IMM[0].xxxx
 24: MOV OUT[9], TEMP[11]
 25: MOV OUT[10], TEMP[12]
 26: MOV OUT[11], TEMP[13]
 27: MOV OUT[12], TEMP[14]
 28: UMUL TEMP[15].x, CONST[0][4].xxxx, IMM[1].xxxx
 29: MOV OUT[TEMP[15].x+1](1), CONST[0][0]
 30: MOV OUT[TEMP[15].x+2](1), CONST[0][1]
 31: MOV OUT[TEMP[15].x+3](1), CONST[0][2]
 32: MOV OUT[TEMP[15].x+4](1), CONST[0][3]
 33: UMUL TEMP[16].x, CONST[0][4].xxxx, IMM[1].xxxx
 34: MOV TEMP[17], OUT[TEMP[16].x+2](1)
 35: USEQ TEMP[18].x, CONST[0][5].xxxx, IMM[1].yyyy
 36: UCMP TEMP[17].x, TEMP[18].xxxx, CONST[0][6].xxxx, TEMP[17]
 37: USEQ TEMP[19].x, CONST[0][5].xxxx, IMM[1].zzzz
 38: UCMP TEMP[17].y, TEMP[19].xxxx, CONST[0][6].xxxx, TEMP[17]
 39: USEQ TEMP[20].x, CONST[0][5].xxxx, IMM[1].wwww
 40: UCMP TEMP[17].z, TEMP[20].xxxx, CONST[0][6].xxxx, TEMP[17]
 41: USEQ TEMP[21].x, CONST[0][5].xxxx, IMM[2].xxxx
 42: UCMP TEMP[17].w, TEMP[21].xxxx, CONST[0][6].xxxx, TEMP[17]
 43: UMUL TEMP[22].x, CONST[0][4].xxxx, IMM[1].xxxx
 44: MOV OUT[TEMP[22].x+2](1), TEMP[17]
 45: END
radeonsi: Compiling shader 1
TGSI shader LLVM IR:

; ModuleID = 'tgsi'
source_filename = "tgsi"
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
target triple = "amdgcn--"

; Function Attrs: nounwind readonly
declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #0

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1

; Function Attrs: nounwind
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #2

define amdgpu_vs void @wrapper([12 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [0 x float] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [80 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #3 {
main_body:
  %13 = ptrtoint [0 x float] addrspace(2)* %2 to i64
  %14 = bitcast i64 %13 to <2 x i32>
  %15 = add i32 %9, %5
  %16 = getelementptr [16 x <4 x i32>], [16 x <4 x i32>] addrspace(2)* %4, i64 0, i64 0, !amdgpu.uniform !0
  %17 = load <4 x i32>, <4 x i32> addrspace(2)* %16, align 16, !invariant.load !0, !alias.scope !1, !noalias !4
  %18 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %17, i32 %15, i32 0, i1 false, i1 false) #1
  %19 = extractelement <4 x float> %18, i32 0
  %20 = extractelement <4 x float> %18, i32 1
  %21 = extractelement <4 x float> %18, i32 2
  %22 = extractelement <4 x float> %18, i32 3
  %23 = shufflevector <2 x i32> %14, <2 x i32> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
  %24 = shufflevector <2 x i32> %14, <2 x i32> undef, <4 x i32> <i32 undef, i32 1, i32 undef, i32 undef>
  %25 = shufflevector <4 x i32> <i32 undef, i32 undef, i32 176, i32 163756>, <4 x i32> %23, <4 x i32> <i32 4, i32 undef, i32 2, i32 3>
  %26 = shufflevector <4 x i32> %25, <4 x i32> %24, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
  %27 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 112)
  %28 = fmul nsz float %27, %19
  %29 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 116)
  %30 = fmul nsz float %29, %19
  %31 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 120)
  %32 = fmul nsz float %31, %19
  %33 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 124)
  %34 = fmul nsz float %33, %19
  %35 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 128)
  %36 = fmul nsz float %35, %20
  %37 = fadd nsz float %36, %28
  %38 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 132)
  %39 = fmul nsz float %38, %20
  %40 = fadd nsz float %39, %30
  %41 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 136)
  %42 = fmul nsz float %41, %20
  %43 = fadd nsz float %42, %32
  %44 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 140)
  %45 = fmul nsz float %44, %20
  %46 = fadd nsz float %45, %34
  %47 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 144)
  %48 = fmul nsz float %47, %21
  %49 = fadd nsz float %48, %37
  %50 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 148)
  %51 = fmul nsz float %50, %21
  %52 = fadd nsz float %51, %40
  %53 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 152)
  %54 = fmul nsz float %53, %21
  %55 = fadd nsz float %54, %43
  %56 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 156)
  %57 = fmul nsz float %56, %21
  %58 = fadd nsz float %57, %46
  %59 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 160)
  %60 = fmul nsz float %59, %22
  %61 = fadd nsz float %60, %49
  %62 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 164)
  %63 = fmul nsz float %62, %22
  %64 = fadd nsz float %63, %52
  %65 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 168)
  %66 = fmul nsz float %65, %22
  %67 = fadd nsz float %66, %55
  %68 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 172)
  %69 = fmul nsz float %68, %22
  %70 = fadd nsz float %69, %58
  %71 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 64)
  %72 = bitcast float %71 to i32
  %73 = shl i32 %72, 2
  %74 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 0)
  %75 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 4)
  %76 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 8)
  %77 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 12)
  %78 = or i32 %73, 1
  %array_vector12.i = insertelement <13 x float> <float undef, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float %61, i32 0
  %79 = insertelement <13 x float> %array_vector12.i, float %74, i32 %78
  %80 = extractelement <13 x float> %79, i32 0
  %81 = extractelement <13 x float> %79, i32 1
  %82 = extractelement <13 x float> %79, i32 2
  %83 = extractelement <13 x float> %79, i32 3
  %84 = extractelement <13 x float> %79, i32 4
  %85 = extractelement <13 x float> %79, i32 5
  %86 = extractelement <13 x float> %79, i32 6
  %87 = extractelement <13 x float> %79, i32 7
  %88 = extractelement <13 x float> %79, i32 8
  %89 = extractelement <13 x float> %79, i32 9
  %90 = extractelement <13 x float> %79, i32 10
  %91 = extractelement <13 x float> %79, i32 11
  %92 = extractelement <13 x float> %79, i32 12
  %array_vector25.i = insertelement <13 x float> <float undef, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float %64, i32 0
  %93 = insertelement <13 x float> %array_vector25.i, float %75, i32 %78
  %94 = extractelement <13 x float> %93, i32 0
  %95 = extractelement <13 x float> %93, i32 1
  %96 = extractelement <13 x float> %93, i32 2
  %97 = extractelement <13 x float> %93, i32 3
  %98 = extractelement <13 x float> %93, i32 4
  %99 = extractelement <13 x float> %93, i32 5
  %100 = extractelement <13 x float> %93, i32 6
  %101 = extractelement <13 x float> %93, i32 7
  %102 = extractelement <13 x float> %93, i32 8
  %103 = extractelement <13 x float> %93, i32 9
  %104 = extractelement <13 x float> %93, i32 10
  %105 = extractelement <13 x float> %93, i32 11
  %106 = extractelement <13 x float> %93, i32 12
  %array_vector38.i = insertelement <13 x float> <float undef, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float %67, i32 0
  %107 = insertelement <13 x float> %array_vector38.i, float %76, i32 %78
  %108 = extractelement <13 x float> %107, i32 0
  %109 = extractelement <13 x float> %107, i32 1
  %110 = extractelement <13 x float> %107, i32 2
  %111 = extractelement <13 x float> %107, i32 3
  %112 = extractelement <13 x float> %107, i32 4
  %113 = extractelement <13 x float> %107, i32 5
  %114 = extractelement <13 x float> %107, i32 6
  %115 = extractelement <13 x float> %107, i32 7
  %116 = extractelement <13 x float> %107, i32 8
  %117 = extractelement <13 x float> %107, i32 9
  %118 = extractelement <13 x float> %107, i32 10
  %119 = extractelement <13 x float> %107, i32 11
  %120 = extractelement <13 x float> %107, i32 12
  %array_vector51.i = insertelement <13 x float> <float undef, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float %70, i32 0
  %121 = insertelement <13 x float> %array_vector51.i, float %77, i32 %78
  %122 = extractelement <13 x float> %121, i32 0
  %123 = extractelement <13 x float> %121, i32 1
  %124 = extractelement <13 x float> %121, i32 2
  %125 = extractelement <13 x float> %121, i32 3
  %126 = extractelement <13 x float> %121, i32 4
  %127 = extractelement <13 x float> %121, i32 5
  %128 = extractelement <13 x float> %121, i32 6
  %129 = extractelement <13 x float> %121, i32 7
  %130 = extractelement <13 x float> %121, i32 8
  %131 = extractelement <13 x float> %121, i32 9
  %132 = extractelement <13 x float> %121, i32 10
  %133 = extractelement <13 x float> %121, i32 11
  %134 = extractelement <13 x float> %121, i32 12
  %135 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 16)
  %136 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 20)
  %137 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 24)
  %138 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 28)
  %139 = or i32 %73, 2
  %array_vector52.i = insertelement <13 x float> undef, float %80, i32 0
  %array_vector53.i = insertelement <13 x float> %array_vector52.i, float %81, i32 1
  %array_vector54.i = insertelement <13 x float> %array_vector53.i, float %82, i32 2
  %array_vector55.i = insertelement <13 x float> %array_vector54.i, float %83, i32 3
  %array_vector56.i = insertelement <13 x float> %array_vector55.i, float %84, i32 4
  %array_vector57.i = insertelement <13 x float> %array_vector56.i, float %85, i32 5
  %array_vector58.i = insertelement <13 x float> %array_vector57.i, float %86, i32 6
  %array_vector59.i = insertelement <13 x float> %array_vector58.i, float %87, i32 7
  %array_vector60.i = insertelement <13 x float> %array_vector59.i, float %88, i32 8
  %array_vector61.i = insertelement <13 x float> %array_vector60.i, float %89, i32 9
  %array_vector62.i = insertelement <13 x float> %array_vector61.i, float %90, i32 10
  %array_vector63.i = insertelement <13 x float> %array_vector62.i, float %91, i32 11
  %array_vector64.i = insertelement <13 x float> %array_vector63.i, float %92, i32 12
  %140 = insertelement <13 x float> %array_vector64.i, float %135, i32 %139
  %141 = extractelement <13 x float> %140, i32 0
  %142 = extractelement <13 x float> %140, i32 1
  %143 = extractelement <13 x float> %140, i32 2
  %144 = extractelement <13 x float> %140, i32 3
  %145 = extractelement <13 x float> %140, i32 4
  %146 = extractelement <13 x float> %140, i32 5
  %147 = extractelement <13 x float> %140, i32 6
  %148 = extractelement <13 x float> %140, i32 7
  %149 = extractelement <13 x float> %140, i32 8
  %150 = extractelement <13 x float> %140, i32 9
  %151 = extractelement <13 x float> %140, i32 10
  %152 = extractelement <13 x float> %140, i32 11
  %153 = extractelement <13 x float> %140, i32 12
  %array_vector65.i = insertelement <13 x float> undef, float %94, i32 0
  %array_vector66.i = insertelement <13 x float> %array_vector65.i, float %95, i32 1
  %array_vector67.i = insertelement <13 x float> %array_vector66.i, float %96, i32 2
  %array_vector68.i = insertelement <13 x float> %array_vector67.i, float %97, i32 3
  %array_vector69.i = insertelement <13 x float> %array_vector68.i, float %98, i32 4
  %array_vector70.i = insertelement <13 x float> %array_vector69.i, float %99, i32 5
  %array_vector71.i = insertelement <13 x float> %array_vector70.i, float %100, i32 6
  %array_vector72.i = insertelement <13 x float> %array_vector71.i, float %101, i32 7
  %array_vector73.i = insertelement <13 x float> %array_vector72.i, float %102, i32 8
  %array_vector74.i = insertelement <13 x float> %array_vector73.i, float %103, i32 9
  %array_vector75.i = insertelement <13 x float> %array_vector74.i, float %104, i32 10
  %array_vector76.i = insertelement <13 x float> %array_vector75.i, float %105, i32 11
  %array_vector77.i = insertelement <13 x float> %array_vector76.i, float %106, i32 12
  %154 = insertelement <13 x float> %array_vector77.i, float %136, i32 %139
  %155 = extractelement <13 x float> %154, i32 0
  %156 = extractelement <13 x float> %154, i32 1
  %157 = extractelement <13 x float> %154, i32 2
  %158 = extractelement <13 x float> %154, i32 3
  %159 = extractelement <13 x float> %154, i32 4
  %160 = extractelement <13 x float> %154, i32 5
  %161 = extractelement <13 x float> %154, i32 6
  %162 = extractelement <13 x float> %154, i32 7
  %163 = extractelement <13 x float> %154, i32 8
  %164 = extractelement <13 x float> %154, i32 9
  %165 = extractelement <13 x float> %154, i32 10
  %166 = extractelement <13 x float> %154, i32 11
  %167 = extractelement <13 x float> %154, i32 12
  %array_vector78.i = insertelement <13 x float> undef, float %108, i32 0
  %array_vector79.i = insertelement <13 x float> %array_vector78.i, float %109, i32 1
  %array_vector80.i = insertelement <13 x float> %array_vector79.i, float %110, i32 2
  %array_vector81.i = insertelement <13 x float> %array_vector80.i, float %111, i32 3
  %array_vector82.i = insertelement <13 x float> %array_vector81.i, float %112, i32 4
  %array_vector83.i = insertelement <13 x float> %array_vector82.i, float %113, i32 5
  %array_vector84.i = insertelement <13 x float> %array_vector83.i, float %114, i32 6
  %array_vector85.i = insertelement <13 x float> %array_vector84.i, float %115, i32 7
  %array_vector86.i = insertelement <13 x float> %array_vector85.i, float %116, i32 8
  %array_vector87.i = insertelement <13 x float> %array_vector86.i, float %117, i32 9
  %array_vector88.i = insertelement <13 x float> %array_vector87.i, float %118, i32 10
  %array_vector89.i = insertelement <13 x float> %array_vector88.i, float %119, i32 11
  %array_vector90.i = insertelement <13 x float> %array_vector89.i, float %120, i32 12
  %168 = insertelement <13 x float> %array_vector90.i, float %137, i32 %139
  %169 = extractelement <13 x float> %168, i32 0
  %170 = extractelement <13 x float> %168, i32 1
  %171 = extractelement <13 x float> %168, i32 2
  %172 = extractelement <13 x float> %168, i32 3
  %173 = extractelement <13 x float> %168, i32 4
  %174 = extractelement <13 x float> %168, i32 5
  %175 = extractelement <13 x float> %168, i32 6
  %176 = extractelement <13 x float> %168, i32 7
  %177 = extractelement <13 x float> %168, i32 8
  %178 = extractelement <13 x float> %168, i32 9
  %179 = extractelement <13 x float> %168, i32 10
  %180 = extractelement <13 x float> %168, i32 11
  %181 = extractelement <13 x float> %168, i32 12
  %array_vector91.i = insertelement <13 x float> undef, float %122, i32 0
  %array_vector92.i = insertelement <13 x float> %array_vector91.i, float %123, i32 1
  %array_vector93.i = insertelement <13 x float> %array_vector92.i, float %124, i32 2
  %array_vector94.i = insertelement <13 x float> %array_vector93.i, float %125, i32 3
  %array_vector95.i = insertelement <13 x float> %array_vector94.i, float %126, i32 4
  %array_vector96.i = insertelement <13 x float> %array_vector95.i, float %127, i32 5
  %array_vector97.i = insertelement <13 x float> %array_vector96.i, float %128, i32 6
  %array_vector98.i = insertelement <13 x float> %array_vector97.i, float %129, i32 7
  %array_vector99.i = insertelement <13 x float> %array_vector98.i, float %130, i32 8
  %array_vector100.i = insertelement <13 x float> %array_vector99.i, float %131, i32 9
  %array_vector101.i = insertelement <13 x float> %array_vector100.i, float %132, i32 10
  %array_vector102.i = insertelement <13 x float> %array_vector101.i, float %133, i32 11
  %array_vector103.i = insertelement <13 x float> %array_vector102.i, float %134, i32 12
  %182 = insertelement <13 x float> %array_vector103.i, float %138, i32 %139
  %183 = extractelement <13 x float> %182, i32 0
  %184 = extractelement <13 x float> %182, i32 1
  %185 = extractelement <13 x float> %182, i32 2
  %186 = extractelement <13 x float> %182, i32 3
  %187 = extractelement <13 x float> %182, i32 4
  %188 = extractelement <13 x float> %182, i32 5
  %189 = extractelement <13 x float> %182, i32 6
  %190 = extractelement <13 x float> %182, i32 7
  %191 = extractelement <13 x float> %182, i32 8
  %192 = extractelement <13 x float> %182, i32 9
  %193 = extractelement <13 x float> %182, i32 10
  %194 = extractelement <13 x float> %182, i32 11
  %195 = extractelement <13 x float> %182, i32 12
  %196 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 32)
  %197 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 36)
  %198 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 40)
  %199 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 44)
  %200 = or i32 %73, 3
  %array_vector104.i = insertelement <13 x float> undef, float %141, i32 0
  %array_vector105.i = insertelement <13 x float> %array_vector104.i, float %142, i32 1
  %array_vector106.i = insertelement <13 x float> %array_vector105.i, float %143, i32 2
  %array_vector107.i = insertelement <13 x float> %array_vector106.i, float %144, i32 3
  %array_vector108.i = insertelement <13 x float> %array_vector107.i, float %145, i32 4
  %array_vector109.i = insertelement <13 x float> %array_vector108.i, float %146, i32 5
  %array_vector110.i = insertelement <13 x float> %array_vector109.i, float %147, i32 6
  %array_vector111.i = insertelement <13 x float> %array_vector110.i, float %148, i32 7
  %array_vector112.i = insertelement <13 x float> %array_vector111.i, float %149, i32 8
  %array_vector113.i = insertelement <13 x float> %array_vector112.i, float %150, i32 9
  %array_vector114.i = insertelement <13 x float> %array_vector113.i, float %151, i32 10
  %array_vector115.i = insertelement <13 x float> %array_vector114.i, float %152, i32 11
  %array_vector116.i = insertelement <13 x float> %array_vector115.i, float %153, i32 12
  %201 = insertelement <13 x float> %array_vector116.i, float %196, i32 %200
  %202 = extractelement <13 x float> %201, i32 0
  %203 = extractelement <13 x float> %201, i32 1
  %204 = extractelement <13 x float> %201, i32 2
  %205 = extractelement <13 x float> %201, i32 3
  %206 = extractelement <13 x float> %201, i32 4
  %207 = extractelement <13 x float> %201, i32 5
  %208 = extractelement <13 x float> %201, i32 6
  %209 = extractelement <13 x float> %201, i32 7
  %210 = extractelement <13 x float> %201, i32 8
  %211 = extractelement <13 x float> %201, i32 9
  %212 = extractelement <13 x float> %201, i32 10
  %213 = extractelement <13 x float> %201, i32 11
  %214 = extractelement <13 x float> %201, i32 12
  %array_vector117.i = insertelement <13 x float> undef, float %155, i32 0
  %array_vector118.i = insertelement <13 x float> %array_vector117.i, float %156, i32 1
  %array_vector119.i = insertelement <13 x float> %array_vector118.i, float %157, i32 2
  %array_vector120.i = insertelement <13 x float> %array_vector119.i, float %158, i32 3
  %array_vector121.i = insertelement <13 x float> %array_vector120.i, float %159, i32 4
  %array_vector122.i = insertelement <13 x float> %array_vector121.i, float %160, i32 5
  %array_vector123.i = insertelement <13 x float> %array_vector122.i, float %161, i32 6
  %array_vector124.i = insertelement <13 x float> %array_vector123.i, float %162, i32 7
  %array_vector125.i = insertelement <13 x float> %array_vector124.i, float %163, i32 8
  %array_vector126.i = insertelement <13 x float> %array_vector125.i, float %164, i32 9
  %array_vector127.i = insertelement <13 x float> %array_vector126.i, float %165, i32 10
  %array_vector128.i = insertelement <13 x float> %array_vector127.i, float %166, i32 11
  %array_vector129.i = insertelement <13 x float> %array_vector128.i, float %167, i32 12
  %215 = insertelement <13 x float> %array_vector129.i, float %197, i32 %200
  %216 = extractelement <13 x float> %215, i32 0
  %217 = extractelement <13 x float> %215, i32 1
  %218 = extractelement <13 x float> %215, i32 2
  %219 = extractelement <13 x float> %215, i32 3
  %220 = extractelement <13 x float> %215, i32 4
  %221 = extractelement <13 x float> %215, i32 5
  %222 = extractelement <13 x float> %215, i32 6
  %223 = extractelement <13 x float> %215, i32 7
  %224 = extractelement <13 x float> %215, i32 8
  %225 = extractelement <13 x float> %215, i32 9
  %226 = extractelement <13 x float> %215, i32 10
  %227 = extractelement <13 x float> %215, i32 11
  %228 = extractelement <13 x float> %215, i32 12
  %array_vector130.i = insertelement <13 x float> undef, float %169, i32 0
  %array_vector131.i = insertelement <13 x float> %array_vector130.i, float %170, i32 1
  %array_vector132.i = insertelement <13 x float> %array_vector131.i, float %171, i32 2
  %array_vector133.i = insertelement <13 x float> %array_vector132.i, float %172, i32 3
  %array_vector134.i = insertelement <13 x float> %array_vector133.i, float %173, i32 4
  %array_vector135.i = insertelement <13 x float> %array_vector134.i, float %174, i32 5
  %array_vector136.i = insertelement <13 x float> %array_vector135.i, float %175, i32 6
  %array_vector137.i = insertelement <13 x float> %array_vector136.i, float %176, i32 7
  %array_vector138.i = insertelement <13 x float> %array_vector137.i, float %177, i32 8
  %array_vector139.i = insertelement <13 x float> %array_vector138.i, float %178, i32 9
  %array_vector140.i = insertelement <13 x float> %array_vector139.i, float %179, i32 10
  %array_vector141.i = insertelement <13 x float> %array_vector140.i, float %180, i32 11
  %array_vector142.i = insertelement <13 x float> %array_vector141.i, float %181, i32 12
  %229 = insertelement <13 x float> %array_vector142.i, float %198, i32 %200
  %230 = extractelement <13 x float> %229, i32 0
  %231 = extractelement <13 x float> %229, i32 1
  %232 = extractelement <13 x float> %229, i32 2
  %233 = extractelement <13 x float> %229, i32 3
  %234 = extractelement <13 x float> %229, i32 4
  %235 = extractelement <13 x float> %229, i32 5
  %236 = extractelement <13 x float> %229, i32 6
  %237 = extractelement <13 x float> %229, i32 7
  %238 = extractelement <13 x float> %229, i32 8
  %239 = extractelement <13 x float> %229, i32 9
  %240 = extractelement <13 x float> %229, i32 10
  %241 = extractelement <13 x float> %229, i32 11
  %242 = extractelement <13 x float> %229, i32 12
  %array_vector143.i = insertelement <13 x float> undef, float %183, i32 0
  %array_vector144.i = insertelement <13 x float> %array_vector143.i, float %184, i32 1
  %array_vector145.i = insertelement <13 x float> %array_vector144.i, float %185, i32 2
  %array_vector146.i = insertelement <13 x float> %array_vector145.i, float %186, i32 3
  %array_vector147.i = insertelement <13 x float> %array_vector146.i, float %187, i32 4
  %array_vector148.i = insertelement <13 x float> %array_vector147.i, float %188, i32 5
  %array_vector149.i = insertelement <13 x float> %array_vector148.i, float %189, i32 6
  %array_vector150.i = insertelement <13 x float> %array_vector149.i, float %190, i32 7
  %array_vector151.i = insertelement <13 x float> %array_vector150.i, float %191, i32 8
  %array_vector152.i = insertelement <13 x float> %array_vector151.i, float %192, i32 9
  %array_vector153.i = insertelement <13 x float> %array_vector152.i, float %193, i32 10
  %array_vector154.i = insertelement <13 x float> %array_vector153.i, float %194, i32 11
  %array_vector155.i = insertelement <13 x float> %array_vector154.i, float %195, i32 12
  %243 = insertelement <13 x float> %array_vector155.i, float %199, i32 %200
  %244 = extractelement <13 x float> %243, i32 0
  %245 = extractelement <13 x float> %243, i32 1
  %246 = extractelement <13 x float> %243, i32 2
  %247 = extractelement <13 x float> %243, i32 3
  %248 = extractelement <13 x float> %243, i32 4
  %249 = extractelement <13 x float> %243, i32 5
  %250 = extractelement <13 x float> %243, i32 6
  %251 = extractelement <13 x float> %243, i32 7
  %252 = extractelement <13 x float> %243, i32 8
  %253 = extractelement <13 x float> %243, i32 9
  %254 = extractelement <13 x float> %243, i32 10
  %255 = extractelement <13 x float> %243, i32 11
  %256 = extractelement <13 x float> %243, i32 12
  %257 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 48)
  %258 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 52)
  %259 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 56)
  %260 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 60)
  %261 = add i32 %73, 4
  %array_vector156.i = insertelement <13 x float> undef, float %202, i32 0
  %array_vector157.i = insertelement <13 x float> %array_vector156.i, float %203, i32 1
  %array_vector158.i = insertelement <13 x float> %array_vector157.i, float %204, i32 2
  %array_vector159.i = insertelement <13 x float> %array_vector158.i, float %205, i32 3
  %array_vector160.i = insertelement <13 x float> %array_vector159.i, float %206, i32 4
  %array_vector161.i = insertelement <13 x float> %array_vector160.i, float %207, i32 5
  %array_vector162.i = insertelement <13 x float> %array_vector161.i, float %208, i32 6
  %array_vector163.i = insertelement <13 x float> %array_vector162.i, float %209, i32 7
  %array_vector164.i = insertelement <13 x float> %array_vector163.i, float %210, i32 8
  %array_vector165.i = insertelement <13 x float> %array_vector164.i, float %211, i32 9
  %array_vector166.i = insertelement <13 x float> %array_vector165.i, float %212, i32 10
  %array_vector167.i = insertelement <13 x float> %array_vector166.i, float %213, i32 11
  %array_vector168.i = insertelement <13 x float> %array_vector167.i, float %214, i32 12
  %262 = insertelement <13 x float> %array_vector168.i, float %257, i32 %261
  %array_vector169.i = insertelement <13 x float> undef, float %216, i32 0
  %array_vector170.i = insertelement <13 x float> %array_vector169.i, float %217, i32 1
  %array_vector171.i = insertelement <13 x float> %array_vector170.i, float %218, i32 2
  %array_vector172.i = insertelement <13 x float> %array_vector171.i, float %219, i32 3
  %array_vector173.i = insertelement <13 x float> %array_vector172.i, float %220, i32 4
  %array_vector174.i = insertelement <13 x float> %array_vector173.i, float %221, i32 5
  %array_vector175.i = insertelement <13 x float> %array_vector174.i, float %222, i32 6
  %array_vector176.i = insertelement <13 x float> %array_vector175.i, float %223, i32 7
  %array_vector177.i = insertelement <13 x float> %array_vector176.i, float %224, i32 8
  %array_vector178.i = insertelement <13 x float> %array_vector177.i, float %225, i32 9
  %array_vector179.i = insertelement <13 x float> %array_vector178.i, float %226, i32 10
  %array_vector180.i = insertelement <13 x float> %array_vector179.i, float %227, i32 11
  %array_vector181.i = insertelement <13 x float> %array_vector180.i, float %228, i32 12
  %263 = insertelement <13 x float> %array_vector181.i, float %258, i32 %261
  %array_vector182.i = insertelement <13 x float> undef, float %230, i32 0
  %array_vector183.i = insertelement <13 x float> %array_vector182.i, float %231, i32 1
  %array_vector184.i = insertelement <13 x float> %array_vector183.i, float %232, i32 2
  %array_vector185.i = insertelement <13 x float> %array_vector184.i, float %233, i32 3
  %array_vector186.i = insertelement <13 x float> %array_vector185.i, float %234, i32 4
  %array_vector187.i = insertelement <13 x float> %array_vector186.i, float %235, i32 5
  %array_vector188.i = insertelement <13 x float> %array_vector187.i, float %236, i32 6
  %array_vector189.i = insertelement <13 x float> %array_vector188.i, float %237, i32 7
  %array_vector190.i = insertelement <13 x float> %array_vector189.i, float %238, i32 8
  %array_vector191.i = insertelement <13 x float> %array_vector190.i, float %239, i32 9
  %array_vector192.i = insertelement <13 x float> %array_vector191.i, float %240, i32 10
  %array_vector193.i = insertelement <13 x float> %array_vector192.i, float %241, i32 11
  %array_vector194.i = insertelement <13 x float> %array_vector193.i, float %242, i32 12
  %264 = insertelement <13 x float> %array_vector194.i, float %259, i32 %261
  %array_vector195.i = insertelement <13 x float> undef, float %244, i32 0
  %array_vector196.i = insertelement <13 x float> %array_vector195.i, float %245, i32 1
  %array_vector197.i = insertelement <13 x float> %array_vector196.i, float %246, i32 2
  %array_vector198.i = insertelement <13 x float> %array_vector197.i, float %247, i32 3
  %array_vector199.i = insertelement <13 x float> %array_vector198.i, float %248, i32 4
  %array_vector200.i = insertelement <13 x float> %array_vector199.i, float %249, i32 5
  %array_vector201.i = insertelement <13 x float> %array_vector200.i, float %250, i32 6
  %array_vector202.i = insertelement <13 x float> %array_vector201.i, float %251, i32 7
  %array_vector203.i = insertelement <13 x float> %array_vector202.i, float %252, i32 8
  %array_vector204.i = insertelement <13 x float> %array_vector203.i, float %253, i32 9
  %array_vector205.i = insertelement <13 x float> %array_vector204.i, float %254, i32 10
  %array_vector206.i = insertelement <13 x float> %array_vector205.i, float %255, i32 11
  %array_vector207.i = insertelement <13 x float> %array_vector206.i, float %256, i32 12
  %265 = insertelement <13 x float> %array_vector207.i, float %260, i32 %261
  %266 = extractelement <13 x float> %262, i32 %139
  %267 = extractelement <13 x float> %263, i32 %139
  %268 = extractelement <13 x float> %264, i32 %139
  %269 = extractelement <13 x float> %265, i32 %139
  %270 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 80)
  %271 = bitcast float %270 to i32
  %272 = icmp eq i32 %271, 0
  %273 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 96)
  %274 = select i1 %272, float %273, float %266
  %275 = icmp eq i32 %271, 1
  %276 = select i1 %275, float %273, float %267
  %277 = icmp eq i32 %271, 2
  %278 = select i1 %277, float %273, float %268
  %279 = icmp eq i32 %271, 3
  %280 = select i1 %279, float %273, float %269
  %281 = insertelement <13 x float> %262, float %274, i32 %139
  %282 = extractelement <13 x float> %281, i32 0
  %283 = extractelement <13 x float> %281, i32 1
  %284 = extractelement <13 x float> %281, i32 2
  %285 = extractelement <13 x float> %281, i32 3
  %286 = extractelement <13 x float> %281, i32 4
  %287 = extractelement <13 x float> %281, i32 5
  %288 = extractelement <13 x float> %281, i32 6
  %289 = extractelement <13 x float> %281, i32 7
  %290 = extractelement <13 x float> %281, i32 8
  %291 = extractelement <13 x float> %281, i32 9
  %292 = extractelement <13 x float> %281, i32 10
  %293 = extractelement <13 x float> %281, i32 11
  %294 = extractelement <13 x float> %281, i32 12
  %295 = insertelement <13 x float> %263, float %276, i32 %139
  %296 = extractelement <13 x float> %295, i32 0
  %297 = extractelement <13 x float> %295, i32 1
  %298 = extractelement <13 x float> %295, i32 2
  %299 = extractelement <13 x float> %295, i32 3
  %300 = extractelement <13 x float> %295, i32 4
  %301 = extractelement <13 x float> %295, i32 5
  %302 = extractelement <13 x float> %295, i32 6
  %303 = extractelement <13 x float> %295, i32 7
  %304 = extractelement <13 x float> %295, i32 8
  %305 = extractelement <13 x float> %295, i32 9
  %306 = extractelement <13 x float> %295, i32 10
  %307 = extractelement <13 x float> %295, i32 11
  %308 = extractelement <13 x float> %295, i32 12
  %309 = insertelement <13 x float> %264, float %278, i32 %139
  %310 = extractelement <13 x float> %309, i32 0
  %311 = extractelement <13 x float> %309, i32 1
  %312 = extractelement <13 x float> %309, i32 2
  %313 = extractelement <13 x float> %309, i32 3
  %314 = extractelement <13 x float> %309, i32 4
  %315 = extractelement <13 x float> %309, i32 5
  %316 = extractelement <13 x float> %309, i32 6
  %317 = extractelement <13 x float> %309, i32 7
  %318 = extractelement <13 x float> %309, i32 8
  %319 = extractelement <13 x float> %309, i32 9
  %320 = extractelement <13 x float> %309, i32 10
  %321 = extractelement <13 x float> %309, i32 11
  %322 = extractelement <13 x float> %309, i32 12
  %323 = insertelement <13 x float> %265, float %280, i32 %139
  %324 = extractelement <13 x float> %323, i32 0
  %325 = extractelement <13 x float> %323, i32 1
  %326 = extractelement <13 x float> %323, i32 2
  %327 = extractelement <13 x float> %323, i32 3
  %328 = extractelement <13 x float> %323, i32 4
  %329 = extractelement <13 x float> %323, i32 5
  %330 = extractelement <13 x float> %323, i32 6
  %331 = extractelement <13 x float> %323, i32 7
  %332 = extractelement <13 x float> %323, i32 8
  %333 = extractelement <13 x float> %323, i32 9
  %334 = extractelement <13 x float> %323, i32 10
  %335 = extractelement <13 x float> %323, i32 11
  %336 = extractelement <13 x float> %323, i32 12
  call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float %282, float %296, float %310, float %324, i1 true, i1 false) #2, !noalias !1
  call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %283, float %297, float %311, float %325, i1 false, i1 false) #2, !noalias !1
  call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float %284, float %298, float %312, float %326, i1 false, i1 false) #2, !noalias !1
  call void @llvm.amdgcn.exp.f32(i32 34, i32 15, float %285, float %299, float %313, float %327, i1 false, i1 false) #2, !noalias !1
  call void @llvm.amdgcn.exp.f32(i32 35, i32 15, float %286, float %300, float %314, float %328, i1 false, i1 false) #2, !noalias !1
  call void @llvm.amdgcn.exp.f32(i32 36, i32 15, float %287, float %301, float %315, float %329, i1 false, i1 false) #2, !noalias !1
  call void @llvm.amdgcn.exp.f32(i32 37, i32 15, float %288, float %302, float %316, float %330, i1 false, i1 false) #2, !noalias !1
  call void @llvm.amdgcn.exp.f32(i32 38, i32 15, float %289, float %303, float %317, float %331, i1 false, i1 false) #2, !noalias !1
  call void @llvm.amdgcn.exp.f32(i32 39, i32 15, float %290, float %304, float %318, float %332, i1 false, i1 false) #2, !noalias !1
  call void @llvm.amdgcn.exp.f32(i32 40, i32 15, float %291, float %305, float %319, float %333, i1 false, i1 false) #2, !noalias !1
  call void @llvm.amdgcn.exp.f32(i32 41, i32 15, float %292, float %306, float %320, float %334, i1 false, i1 false) #2, !noalias !1
  call void @llvm.amdgcn.exp.f32(i32 42, i32 15, float %293, float %307, float %321, float %335, i1 false, i1 false) #2, !noalias !1
  call void @llvm.amdgcn.exp.f32(i32 43, i32 15, float %294, float %308, float %322, float %336, i1 false, i1 false) #2, !noalias !1
  ret void
}

attributes #0 = { nounwind readonly }
attributes #1 = { nounwind readnone }
attributes #2 = { nounwind }
attributes #3 = { "no-signed-zeros-fp-math"="true" }

!0 = !{}
!1 = !{!2}
!2 = distinct !{!2, !3, !"main: argument 1"}
!3 = distinct !{!3, !"main"}
!4 = !{!5}
!5 = distinct !{!5, !3, !"main: argument 0"}

SHADER KEY
  part.vs.prolog.instance_divisor_is_one = 0
  part.vs.prolog.instance_divisor_is_fetched = 0
  part.vs.prolog.ls_vgpr_fix = 0
  mono.vs.fix_fetch = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
  as_es = 0
  as_ls = 0
  mono.u.vs_export_prim_id = 0
  opt.kill_outputs = 0x0
  opt.clip_disable = 0

Vertex Shader as VS:
Shader main disassembly:
	s_load_dwordx4 s[0:3], s[8:9], 0x0                    ; C00A0004 00000000
	v_add_i32_e32 v0, vcc, s10, v0                        ; 3200000A
	s_mov_b32 s7, 0x27fac                                 ; BE8700FF 00027FAC
	s_movk_i32 s6, 0xb0                                   ; B00600B0
	s_buffer_load_dword s32, s[4:7], 0x40                 ; C0220802 00000040
	s_waitcnt lgkmcnt(0)                                  ; BF8C007F
	buffer_load_format_xyzw v[13:16], v0, s[0:3], 0 idxen ; E00C2000 80000D00
	s_buffer_load_dword s33, s[4:7], 0x50                 ; C0220842 00000050
	s_buffer_load_dword s34, s[4:7], 0x60                 ; C0220882 00000060
	s_buffer_load_dwordx4 s[8:11], s[4:7], 0x70           ; C02A0202 00000070
	s_buffer_load_dwordx4 s[12:15], s[4:7], 0x80          ; C02A0302 00000080
	s_buffer_load_dwordx4 s[16:19], s[4:7], 0x90          ; C02A0402 00000090
	s_buffer_load_dwordx4 s[20:23], s[4:7], 0xa0          ; C02A0502 000000A0
	s_buffer_load_dwordx4 s[24:27], s[4:7], 0x0           ; C02A0602 00000000
	s_buffer_load_dwordx4 s[28:31], s[4:7], 0x10          ; C02A0702 00000010
	v_mov_b32_e32 v1, 0                                   ; 7E020280
	s_buffer_load_dwordx4 s[0:3], s[4:7], 0x20            ; C02A0002 00000020
	v_mov_b32_e32 v2, v1                                  ; 7E040301
	s_buffer_load_dwordx4 s[4:7], s[4:7], 0x30            ; C02A0102 00000030
	v_mov_b32_e32 v10, v1                                 ; 7E140301
	v_mov_b32_e32 v3, v1                                  ; 7E060301
	v_mov_b32_e32 v4, v1                                  ; 7E080301
	v_mov_b32_e32 v5, v1                                  ; 7E0A0301
	v_mov_b32_e32 v6, v1                                  ; 7E0C0301
	v_mov_b32_e32 v7, v1                                  ; 7E0E0301
	v_mov_b32_e32 v8, v1                                  ; 7E100301
	v_mov_b32_e32 v9, v1                                  ; 7E120301
	v_mov_b32_e32 v11, v1                                 ; 7E160301
	v_mov_b32_e32 v12, v1                                 ; 7E180301
	s_lshl_b32 s32, s32, 2                                ; 8E208220
	s_waitcnt lgkmcnt(0)                                  ; BF8C007F
	v_mov_b32_e32 v64, s24                                ; 7E800218
	s_mov_b32 m0, s32                                     ; BEFC0020
	v_mov_b32_e32 v65, s25                                ; 7E820219
	v_mov_b32_e32 v68, s28                                ; 7E88021C
	v_mov_b32_e32 v66, s26                                ; 7E84021A
	v_mov_b32_e32 v69, s29                                ; 7E8A021D
	v_mov_b32_e32 v72, s0                                 ; 7E900200
	v_mov_b32_e32 v67, s27                                ; 7E86021B
	v_mov_b32_e32 v70, s30                                ; 7E8C021E
	v_mov_b32_e32 v73, s1                                 ; 7E920201
	v_mov_b32_e32 v76, s4                                 ; 7E980204
	v_mov_b32_e32 v71, s31                                ; 7E8E021F
	v_mov_b32_e32 v74, s2                                 ; 7E940202
	v_mov_b32_e32 v79, s5                                 ; 7E9E0205
	v_mov_b32_e32 v75, s3                                 ; 7E960203
	v_cmp_eq_u32_e64 vcc, s33, 0                          ; D0CA006A 00010021
	s_waitcnt vmcnt(0)                                    ; BF8C0F70
	v_mul_f32_e32 v0, s8, v13                             ; 0A001A08
	v_mul_f32_e32 v77, s9, v13                            ; 0A9A1A09
	v_mul_f32_e32 v78, s10, v13                           ; 0A9C1A0A
	v_mul_f32_e32 v13, s11, v13                           ; 0A1A1A0B
	v_mac_f32_e32 v0, s12, v14                            ; 2C001C0C
	v_mac_f32_e32 v13, s15, v14                           ; 2C1A1C0F
	v_mac_f32_e32 v77, s13, v14                           ; 2C9A1C0D
	v_mac_f32_e32 v78, s14, v14                           ; 2C9C1C0E
	v_mac_f32_e32 v0, s16, v15                            ; 2C001E10
	v_mac_f32_e32 v13, s19, v15                           ; 2C1A1E13
	v_mac_f32_e32 v77, s17, v15                           ; 2C9A1E11
	v_mac_f32_e32 v78, s18, v15                           ; 2C9C1E12
	v_mac_f32_e32 v0, s20, v16                            ; 2C002014
	v_mac_f32_e32 v13, s23, v16                           ; 2C1A2017
	v_mov_b32_e32 v47, v15                                ; 7E5E030F
	v_mac_f32_e32 v77, s21, v16                           ; 2C9A2015
	v_mac_f32_e32 v78, s22, v16                           ; 2C9C2016
	v_mov_b32_e32 v31, v15                                ; 7E3E030F
	v_mov_b32_e32 v63, v15                                ; 7E7E030F
	v_mov_b32_e32 v32, v0                                 ; 7E400300
	v_mov_b32_e32 v30, v14                                ; 7E3C030E
	v_mov_b32_e32 v29, v13                                ; 7E3A030D
	v_mov_b32_e32 v28, v12                                ; 7E38030C
	v_mov_b32_e32 v27, v11                                ; 7E36030B
	v_mov_b32_e32 v26, v10                                ; 7E34030A
	v_mov_b32_e32 v25, v9                                 ; 7E320309
	v_mov_b32_e32 v24, v8                                 ; 7E300308
	v_mov_b32_e32 v23, v7                                 ; 7E2E0307
	v_mov_b32_e32 v22, v6                                 ; 7E2C0306
	v_mov_b32_e32 v21, v5                                 ; 7E2A0305
	v_mov_b32_e32 v20, v4                                 ; 7E280304
	v_mov_b32_e32 v19, v3                                 ; 7E260303
	v_mov_b32_e32 v18, v2                                 ; 7E240302
	v_mov_b32_e32 v17, v1                                 ; 7E220301
	v_mov_b32_e32 v16, v0                                 ; 7E200300
	v_movreld_b32_e32 v17, v64                            ; 7E226D40
	v_mov_b32_e32 v48, v0                                 ; 7E600300
	v_mov_b32_e32 v32, v77                                ; 7E40034D
	v_mov_b32_e32 v46, v14                                ; 7E5C030E
	v_mov_b32_e32 v45, v13                                ; 7E5A030D
	v_mov_b32_e32 v44, v12                                ; 7E58030C
	v_mov_b32_e32 v43, v11                                ; 7E56030B
	v_mov_b32_e32 v42, v10                                ; 7E54030A
	v_mov_b32_e32 v41, v9                                 ; 7E520309
	v_mov_b32_e32 v40, v8                                 ; 7E500308
	v_mov_b32_e32 v39, v7                                 ; 7E4E0307
	v_mov_b32_e32 v38, v6                                 ; 7E4C0306
	v_mov_b32_e32 v37, v5                                 ; 7E4A0305
	v_mov_b32_e32 v36, v4                                 ; 7E480304
	v_mov_b32_e32 v35, v3                                 ; 7E460303
	v_mov_b32_e32 v34, v2                                 ; 7E440302
	v_mov_b32_e32 v33, v1                                 ; 7E420301
	v_movreld_b32_e32 v33, v65                            ; 7E426D41
	v_movreld_b32_e32 v18, v68                            ; 7E246D44
	v_mov_b32_e32 v48, v78                                ; 7E60034E
	v_mov_b32_e32 v62, v14                                ; 7E7C030E
	v_mov_b32_e32 v61, v13                                ; 7E7A030D
	v_mov_b32_e32 v60, v12                                ; 7E78030C
	v_mov_b32_e32 v59, v11                                ; 7E76030B
	v_mov_b32_e32 v58, v10                                ; 7E74030A
	v_mov_b32_e32 v57, v9                                 ; 7E720309
	v_mov_b32_e32 v56, v8                                 ; 7E700308
	v_mov_b32_e32 v55, v7                                 ; 7E6E0307
	v_mov_b32_e32 v54, v6                                 ; 7E6C0306
	v_mov_b32_e32 v53, v5                                 ; 7E6A0305
	v_mov_b32_e32 v52, v4                                 ; 7E680304
	v_mov_b32_e32 v51, v3                                 ; 7E660303
	v_mov_b32_e32 v50, v2                                 ; 7E640302
	v_mov_b32_e32 v49, v1                                 ; 7E620301
	v_mov_b32_e32 v0, v13                                 ; 7E00030D
	v_movreld_b32_e32 v49, v66                            ; 7E626D42
	v_movreld_b32_e32 v34, v69                            ; 7E446D45
	v_movreld_b32_e32 v19, v72                            ; 7E266D48
	v_movreld_b32_e32 v1, v67                             ; 7E026D43
	v_movreld_b32_e32 v50, v70                            ; 7E646D46
	v_movreld_b32_e32 v35, v73                            ; 7E466D49
	v_movreld_b32_e32 v20, v76                            ; 7E286D4C
	v_movreld_b32_e32 v2, v71                             ; 7E046D47
	v_movreld_b32_e32 v36, v79                            ; 7E486D4F
	v_mov_b32_e32 v77, s6                                 ; 7E9A0206
	v_movreld_b32_e32 v51, v74                            ; 7E666D4A
	v_mov_b32_e32 v64, s34                                ; 7E800222
	v_movrels_b32_e32 v65, v18                            ; 7E826F12
	v_movreld_b32_e32 v52, v77                            ; 7E686D4D
	v_cndmask_b32_e32 v65, v65, v64, vcc                  ; 00828141
	v_mov_b32_e32 v78, s7                                 ; 7E9C0207
	v_movreld_b32_e32 v3, v75                             ; 7E066D4B
	v_movrels_b32_e32 v66, v34                            ; 7E846F22
	v_cmp_eq_u32_e64 vcc, s33, 1                          ; D0CA006A 00010221
	v_movreld_b32_e32 v4, v78                             ; 7E086D4E
	v_cndmask_b32_e32 v66, v66, v64, vcc                  ; 00848142
	v_movrels_b32_e32 v67, v50                            ; 7E866F32
	v_cmp_eq_u32_e64 vcc, s33, 2                          ; D0CA006A 00010421
	v_cndmask_b32_e32 v67, v67, v64, vcc                  ; 00868143
	v_movrels_b32_e32 v68, v2                             ; 7E886F02
	v_cmp_eq_u32_e64 vcc, s33, 3                          ; D0CA006A 00010621
	v_cndmask_b32_e32 v64, v68, v64, vcc                  ; 00808144
	v_movreld_b32_e32 v18, v65                            ; 7E246D41
	v_movreld_b32_e32 v34, v66                            ; 7E446D42
	v_movreld_b32_e32 v50, v67                            ; 7E646D43
	v_movreld_b32_e32 v2, v64                             ; 7E046D40
	exp pos0 v16, v32, v48, v0 done                       ; C40008CF 00302010
	exp param0 v17, v33, v49, v1                          ; C400020F 01312111
	exp param1 v18, v34, v50, v2                          ; C400021F 02322212
	exp param2 v19, v35, v51, v3                          ; C400022F 03332313
	exp param3 v20, v36, v52, v4                          ; C400023F 04342414
	exp param4 v21, v37, v53, v5                          ; C400024F 05352515
	exp param5 v22, v38, v54, v6                          ; C400025F 06362616
	exp param6 v23, v39, v55, v7                          ; C400026F 07372717
	exp param7 v24, v40, v56, v8                          ; C400027F 08382818
	exp param8 v25, v41, v57, v9                          ; C400028F 09392919
	exp param9 v26, v42, v58, v10                         ; C400029F 0A3A2A1A
	exp param10 v27, v43, v59, v11                        ; C40002AF 0B3B2B1B
	exp param11 v28, v44, v60, v12                        ; C40002BF 0C3C2C1C
	s_endpgm                                              ; BF810000

*** SHADER STATS ***
SGPRS: 96
VGPRS: 80
Spilled SGPRs: 0
Spilled VGPRs: 0
Private memory VGPRs: 0
Code Size: 776 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
Max Waves: 3
********************
-------------- next part --------------
VERT
PROPERTY NEXT_SHADER FRAG
DCL IN[0]
DCL OUT[0], POSITION
DCL OUT[1..12], ARRAY(1), GENERIC[0]
DCL CONST[0][0..10]
DCL TEMP[0..2], LOCAL
DCL TEMP[3..6], ARRAY(1), LOCAL
DCL TEMP[7..10], ARRAY(2), LOCAL
DCL TEMP[11..14], ARRAY(3), LOCAL
DCL TEMP[15..22], LOCAL
IMM[0] FLT32 {    0.0000,     0.0000,     0.0000,     0.0000}
IMM[1] INT32 {4, 0, 1, 2}
IMM[2] INT32 {3, 0, 0, 0}
  0: MUL TEMP[0], CONST[0][7], IN[0].xxxx
  1: MAD TEMP[1], CONST[0][8], IN[0].yyyy, TEMP[0]
  2: MAD TEMP[2], CONST[0][9], IN[0].zzzz, TEMP[1]
  3: MAD OUT[0], CONST[0][10], IN[0].wwww, TEMP[2]
  4: MOV TEMP[3], IMM[0].xxxx
  5: MOV TEMP[4], IMM[0].xxxx
  6: MOV TEMP[5], IMM[0].xxxx
  7: MOV TEMP[6], IMM[0].xxxx
  8: MOV OUT[1], TEMP[3]
  9: MOV OUT[2], TEMP[4]
 10: MOV OUT[3], TEMP[5]
 11: MOV OUT[4], TEMP[6]
 12: MOV TEMP[7], IMM[0].xxxx
 13: MOV TEMP[8], IMM[0].xxxx
 14: MOV TEMP[9], IMM[0].xxxx
 15: MOV TEMP[10], IMM[0].xxxx
 16: MOV OUT[5], TEMP[7]
 17: MOV OUT[6], TEMP[8]
 18: MOV OUT[7], TEMP[9]
 19: MOV OUT[8], TEMP[10]
 20: MOV TEMP[11], IMM[0].xxxx
 21: MOV TEMP[12], IMM[0].xxxx
 22: MOV TEMP[13], IMM[0].xxxx
 23: MOV TEMP[14], IMM[0].xxxx
 24: MOV OUT[9], TEMP[11]
 25: MOV OUT[10], TEMP[12]
 26: MOV OUT[11], TEMP[13]
 27: MOV OUT[12], TEMP[14]
 28: UMUL TEMP[15].x, CONST[0][4].xxxx, IMM[1].xxxx
 29: MOV OUT[TEMP[15].x+1](1), CONST[0][0]
 30: MOV OUT[TEMP[15].x+2](1), CONST[0][1]
 31: MOV OUT[TEMP[15].x+3](1), CONST[0][2]
 32: MOV OUT[TEMP[15].x+4](1), CONST[0][3]
 33: UMUL TEMP[16].x, CONST[0][4].xxxx, IMM[1].xxxx
 34: MOV TEMP[17], OUT[TEMP[16].x+2](1)
 35: USEQ TEMP[18].x, CONST[0][5].xxxx, IMM[1].yyyy
 36: UCMP TEMP[17].x, TEMP[18].xxxx, CONST[0][6].xxxx, TEMP[17]
 37: USEQ TEMP[19].x, CONST[0][5].xxxx, IMM[1].zzzz
 38: UCMP TEMP[17].y, TEMP[19].xxxx, CONST[0][6].xxxx, TEMP[17]
 39: USEQ TEMP[20].x, CONST[0][5].xxxx, IMM[1].wwww
 40: UCMP TEMP[17].z, TEMP[20].xxxx, CONST[0][6].xxxx, TEMP[17]
 41: USEQ TEMP[21].x, CONST[0][5].xxxx, IMM[2].xxxx
 42: UCMP TEMP[17].w, TEMP[21].xxxx, CONST[0][6].xxxx, TEMP[17]
 43: UMUL TEMP[22].x, CONST[0][4].xxxx, IMM[1].xxxx
 44: MOV OUT[TEMP[22].x+2](1), TEMP[17]
 45: END
radeonsi: Compiling shader 1
TGSI shader LLVM IR:

; ModuleID = 'tgsi'
source_filename = "tgsi"
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
target triple = "amdgcn--"

; Function Attrs: nounwind readonly
declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #0

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1

; Function Attrs: nounwind
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #2

define amdgpu_vs void @wrapper([12 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [0 x float] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [80 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #3 {
main_body:
  %13 = ptrtoint [0 x float] addrspace(2)* %2 to i64
  %14 = bitcast i64 %13 to <2 x i32>
  %15 = add i32 %9, %5
  %16 = getelementptr [16 x <4 x i32>], [16 x <4 x i32>] addrspace(2)* %4, i64 0, i64 0, !amdgpu.uniform !0
  %17 = load <4 x i32>, <4 x i32> addrspace(2)* %16, align 16, !invariant.load !0, !alias.scope !1, !noalias !4
  %18 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %17, i32 %15, i32 0, i1 false, i1 false) #1
  %19 = extractelement <4 x float> %18, i32 0
  %20 = extractelement <4 x float> %18, i32 1
  %21 = extractelement <4 x float> %18, i32 2
  %22 = extractelement <4 x float> %18, i32 3
  %23 = shufflevector <2 x i32> %14, <2 x i32> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
  %24 = shufflevector <2 x i32> %14, <2 x i32> undef, <4 x i32> <i32 undef, i32 1, i32 undef, i32 undef>
  %25 = shufflevector <4 x i32> <i32 undef, i32 undef, i32 176, i32 163756>, <4 x i32> %23, <4 x i32> <i32 4, i32 undef, i32 2, i32 3>
  %26 = shufflevector <4 x i32> %25, <4 x i32> %24, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
  %27 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 112)
  %28 = fmul nsz float %27, %19
  %29 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 116)
  %30 = fmul nsz float %29, %19
  %31 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 120)
  %32 = fmul nsz float %31, %19
  %33 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 124)
  %34 = fmul nsz float %33, %19
  %35 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 128)
  %36 = fmul nsz float %35, %20
  %37 = fadd nsz float %36, %28
  %38 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 132)
  %39 = fmul nsz float %38, %20
  %40 = fadd nsz float %39, %30
  %41 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 136)
  %42 = fmul nsz float %41, %20
  %43 = fadd nsz float %42, %32
  %44 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 140)
  %45 = fmul nsz float %44, %20
  %46 = fadd nsz float %45, %34
  %47 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 144)
  %48 = fmul nsz float %47, %21
  %49 = fadd nsz float %48, %37
  %50 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 148)
  %51 = fmul nsz float %50, %21
  %52 = fadd nsz float %51, %40
  %53 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 152)
  %54 = fmul nsz float %53, %21
  %55 = fadd nsz float %54, %43
  %56 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 156)
  %57 = fmul nsz float %56, %21
  %58 = fadd nsz float %57, %46
  %59 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 160)
  %60 = fmul nsz float %59, %22
  %61 = fadd nsz float %60, %49
  %62 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 164)
  %63 = fmul nsz float %62, %22
  %64 = fadd nsz float %63, %52
  %65 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 168)
  %66 = fmul nsz float %65, %22
  %67 = fadd nsz float %66, %55
  %68 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 172)
  %69 = fmul nsz float %68, %22
  %70 = fadd nsz float %69, %58
  %71 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 64)
  %72 = bitcast float %71 to i32
  %73 = shl i32 %72, 2
  %74 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 0)
  %75 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 4)
  %76 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 8)
  %77 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 12)
  %78 = or i32 %73, 1
  %array_vector12.i = insertelement <13 x float> <float undef, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float %61, i32 0
  %79 = insertelement <13 x float> %array_vector12.i, float %74, i32 %78
  %80 = extractelement <13 x float> %79, i32 0
  %81 = extractelement <13 x float> %79, i32 1
  %82 = extractelement <13 x float> %79, i32 2
  %83 = extractelement <13 x float> %79, i32 3
  %84 = extractelement <13 x float> %79, i32 4
  %85 = extractelement <13 x float> %79, i32 5
  %86 = extractelement <13 x float> %79, i32 6
  %87 = extractelement <13 x float> %79, i32 7
  %88 = extractelement <13 x float> %79, i32 8
  %89 = extractelement <13 x float> %79, i32 9
  %90 = extractelement <13 x float> %79, i32 10
  %91 = extractelement <13 x float> %79, i32 11
  %92 = extractelement <13 x float> %79, i32 12
  %array_vector25.i = insertelement <13 x float> <float undef, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float %64, i32 0
  %93 = insertelement <13 x float> %array_vector25.i, float %75, i32 %78
  %94 = extractelement <13 x float> %93, i32 0
  %95 = extractelement <13 x float> %93, i32 1
  %96 = extractelement <13 x float> %93, i32 2
  %97 = extractelement <13 x float> %93, i32 3
  %98 = extractelement <13 x float> %93, i32 4
  %99 = extractelement <13 x float> %93, i32 5
  %100 = extractelement <13 x float> %93, i32 6
  %101 = extractelement <13 x float> %93, i32 7
  %102 = extractelement <13 x float> %93, i32 8
  %103 = extractelement <13 x float> %93, i32 9
  %104 = extractelement <13 x float> %93, i32 10
  %105 = extractelement <13 x float> %93, i32 11
  %106 = extractelement <13 x float> %93, i32 12
  %array_vector38.i = insertelement <13 x float> <float undef, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float %67, i32 0
  %107 = insertelement <13 x float> %array_vector38.i, float %76, i32 %78
  %108 = extractelement <13 x float> %107, i32 0
  %109 = extractelement <13 x float> %107, i32 1
  %110 = extractelement <13 x float> %107, i32 2
  %111 = extractelement <13 x float> %107, i32 3
  %112 = extractelement <13 x float> %107, i32 4
  %113 = extractelement <13 x float> %107, i32 5
  %114 = extractelement <13 x float> %107, i32 6
  %115 = extractelement <13 x float> %107, i32 7
  %116 = extractelement <13 x float> %107, i32 8
  %117 = extractelement <13 x float> %107, i32 9
  %118 = extractelement <13 x float> %107, i32 10
  %119 = extractelement <13 x float> %107, i32 11
  %120 = extractelement <13 x float> %107, i32 12
  %array_vector51.i = insertelement <13 x float> <float undef, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float %70, i32 0
  %121 = insertelement <13 x float> %array_vector51.i, float %77, i32 %78
  %122 = extractelement <13 x float> %121, i32 0
  %123 = extractelement <13 x float> %121, i32 1
  %124 = extractelement <13 x float> %121, i32 2
  %125 = extractelement <13 x float> %121, i32 3
  %126 = extractelement <13 x float> %121, i32 4
  %127 = extractelement <13 x float> %121, i32 5
  %128 = extractelement <13 x float> %121, i32 6
  %129 = extractelement <13 x float> %121, i32 7
  %130 = extractelement <13 x float> %121, i32 8
  %131 = extractelement <13 x float> %121, i32 9
  %132 = extractelement <13 x float> %121, i32 10
  %133 = extractelement <13 x float> %121, i32 11
  %134 = extractelement <13 x float> %121, i32 12
  %135 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 16)
  %136 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 20)
  %137 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 24)
  %138 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 28)
  %139 = or i32 %73, 2
  %array_vector52.i = insertelement <13 x float> undef, float %80, i32 0
  %array_vector53.i = insertelement <13 x float> %array_vector52.i, float %81, i32 1
  %array_vector54.i = insertelement <13 x float> %array_vector53.i, float %82, i32 2
  %array_vector55.i = insertelement <13 x float> %array_vector54.i, float %83, i32 3
  %array_vector56.i = insertelement <13 x float> %array_vector55.i, float %84, i32 4
  %array_vector57.i = insertelement <13 x float> %array_vector56.i, float %85, i32 5
  %array_vector58.i = insertelement <13 x float> %array_vector57.i, float %86, i32 6
  %array_vector59.i = insertelement <13 x float> %array_vector58.i, float %87, i32 7
  %array_vector60.i = insertelement <13 x float> %array_vector59.i, float %88, i32 8
  %array_vector61.i = insertelement <13 x float> %array_vector60.i, float %89, i32 9
  %array_vector62.i = insertelement <13 x float> %array_vector61.i, float %90, i32 10
  %array_vector63.i = insertelement <13 x float> %array_vector62.i, float %91, i32 11
  %array_vector64.i = insertelement <13 x float> %array_vector63.i, float %92, i32 12
  %140 = insertelement <13 x float> %array_vector64.i, float %135, i32 %139
  %141 = extractelement <13 x float> %140, i32 0
  %142 = extractelement <13 x float> %140, i32 1
  %143 = extractelement <13 x float> %140, i32 2
  %144 = extractelement <13 x float> %140, i32 3
  %145 = extractelement <13 x float> %140, i32 4
  %146 = extractelement <13 x float> %140, i32 5
  %147 = extractelement <13 x float> %140, i32 6
  %148 = extractelement <13 x float> %140, i32 7
  %149 = extractelement <13 x float> %140, i32 8
  %150 = extractelement <13 x float> %140, i32 9
  %151 = extractelement <13 x float> %140, i32 10
  %152 = extractelement <13 x float> %140, i32 11
  %153 = extractelement <13 x float> %140, i32 12
  %array_vector65.i = insertelement <13 x float> undef, float %94, i32 0
  %array_vector66.i = insertelement <13 x float> %array_vector65.i, float %95, i32 1
  %array_vector67.i = insertelement <13 x float> %array_vector66.i, float %96, i32 2
  %array_vector68.i = insertelement <13 x float> %array_vector67.i, float %97, i32 3
  %array_vector69.i = insertelement <13 x float> %array_vector68.i, float %98, i32 4
  %array_vector70.i = insertelement <13 x float> %array_vector69.i, float %99, i32 5
  %array_vector71.i = insertelement <13 x float> %array_vector70.i, float %100, i32 6
  %array_vector72.i = insertelement <13 x float> %array_vector71.i, float %101, i32 7
  %array_vector73.i = insertelement <13 x float> %array_vector72.i, float %102, i32 8
  %array_vector74.i = insertelement <13 x float> %array_vector73.i, float %103, i32 9
  %array_vector75.i = insertelement <13 x float> %array_vector74.i, float %104, i32 10
  %array_vector76.i = insertelement <13 x float> %array_vector75.i, float %105, i32 11
  %array_vector77.i = insertelement <13 x float> %array_vector76.i, float %106, i32 12
  %154 = insertelement <13 x float> %array_vector77.i, float %136, i32 %139
  %155 = extractelement <13 x float> %154, i32 0
  %156 = extractelement <13 x float> %154, i32 1
  %157 = extractelement <13 x float> %154, i32 2
  %158 = extractelement <13 x float> %154, i32 3
  %159 = extractelement <13 x float> %154, i32 4
  %160 = extractelement <13 x float> %154, i32 5
  %161 = extractelement <13 x float> %154, i32 6
  %162 = extractelement <13 x float> %154, i32 7
  %163 = extractelement <13 x float> %154, i32 8
  %164 = extractelement <13 x float> %154, i32 9
  %165 = extractelement <13 x float> %154, i32 10
  %166 = extractelement <13 x float> %154, i32 11
  %167 = extractelement <13 x float> %154, i32 12
  %array_vector78.i = insertelement <13 x float> undef, float %108, i32 0
  %array_vector79.i = insertelement <13 x float> %array_vector78.i, float %109, i32 1
  %array_vector80.i = insertelement <13 x float> %array_vector79.i, float %110, i32 2
  %array_vector81.i = insertelement <13 x float> %array_vector80.i, float %111, i32 3
  %array_vector82.i = insertelement <13 x float> %array_vector81.i, float %112, i32 4
  %array_vector83.i = insertelement <13 x float> %array_vector82.i, float %113, i32 5
  %array_vector84.i = insertelement <13 x float> %array_vector83.i, float %114, i32 6
  %array_vector85.i = insertelement <13 x float> %array_vector84.i, float %115, i32 7
  %array_vector86.i = insertelement <13 x float> %array_vector85.i, float %116, i32 8
  %array_vector87.i = insertelement <13 x float> %array_vector86.i, float %117, i32 9
  %array_vector88.i = insertelement <13 x float> %array_vector87.i, float %118, i32 10
  %array_vector89.i = insertelement <13 x float> %array_vector88.i, float %119, i32 11
  %array_vector90.i = insertelement <13 x float> %array_vector89.i, float %120, i32 12
  %168 = insertelement <13 x float> %array_vector90.i, float %137, i32 %139
  %169 = extractelement <13 x float> %168, i32 0
  %170 = extractelement <13 x float> %168, i32 1
  %171 = extractelement <13 x float> %168, i32 2
  %172 = extractelement <13 x float> %168, i32 3
  %173 = extractelement <13 x float> %168, i32 4
  %174 = extractelement <13 x float> %168, i32 5
  %175 = extractelement <13 x float> %168, i32 6
  %176 = extractelement <13 x float> %168, i32 7
  %177 = extractelement <13 x float> %168, i32 8
  %178 = extractelement <13 x float> %168, i32 9
  %179 = extractelement <13 x float> %168, i32 10
  %180 = extractelement <13 x float> %168, i32 11
  %181 = extractelement <13 x float> %168, i32 12
  %array_vector91.i = insertelement <13 x float> undef, float %122, i32 0
  %array_vector92.i = insertelement <13 x float> %array_vector91.i, float %123, i32 1
  %array_vector93.i = insertelement <13 x float> %array_vector92.i, float %124, i32 2
  %array_vector94.i = insertelement <13 x float> %array_vector93.i, float %125, i32 3
  %array_vector95.i = insertelement <13 x float> %array_vector94.i, float %126, i32 4
  %array_vector96.i = insertelement <13 x float> %array_vector95.i, float %127, i32 5
  %array_vector97.i = insertelement <13 x float> %array_vector96.i, float %128, i32 6
  %array_vector98.i = insertelement <13 x float> %array_vector97.i, float %129, i32 7
  %array_vector99.i = insertelement <13 x float> %array_vector98.i, float %130, i32 8
  %array_vector100.i = insertelement <13 x float> %array_vector99.i, float %131, i32 9
  %array_vector101.i = insertelement <13 x float> %array_vector100.i, float %132, i32 10
  %array_vector102.i = insertelement <13 x float> %array_vector101.i, float %133, i32 11
  %array_vector103.i = insertelement <13 x float> %array_vector102.i, float %134, i32 12
  %182 = insertelement <13 x float> %array_vector103.i, float %138, i32 %139
  %183 = extractelement <13 x float> %182, i32 0
  %184 = extractelement <13 x float> %182, i32 1
  %185 = extractelement <13 x float> %182, i32 2
  %186 = extractelement <13 x float> %182, i32 3
  %187 = extractelement <13 x float> %182, i32 4
  %188 = extractelement <13 x float> %182, i32 5
  %189 = extractelement <13 x float> %182, i32 6
  %190 = extractelement <13 x float> %182, i32 7
  %191 = extractelement <13 x float> %182, i32 8
  %192 = extractelement <13 x float> %182, i32 9
  %193 = extractelement <13 x float> %182, i32 10
  %194 = extractelement <13 x float> %182, i32 11
  %195 = extractelement <13 x float> %182, i32 12
  %196 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 32)
  %197 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 36)
  %198 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 40)
  %199 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 44)
  %200 = or i32 %73, 3
  %array_vector104.i = insertelement <13 x float> undef, float %141, i32 0
  %array_vector105.i = insertelement <13 x float> %array_vector104.i, float %142, i32 1
  %array_vector106.i = insertelement <13 x float> %array_vector105.i, float %143, i32 2
  %array_vector107.i = insertelement <13 x float> %array_vector106.i, float %144, i32 3
  %array_vector108.i = insertelement <13 x float> %array_vector107.i, float %145, i32 4
  %array_vector109.i = insertelement <13 x float> %array_vector108.i, float %146, i32 5
  %array_vector110.i = insertelement <13 x float> %array_vector109.i, float %147, i32 6
  %array_vector111.i = insertelement <13 x float> %array_vector110.i, float %148, i32 7
  %array_vector112.i = insertelement <13 x float> %array_vector111.i, float %149, i32 8
  %array_vector113.i = insertelement <13 x float> %array_vector112.i, float %150, i32 9
  %array_vector114.i = insertelement <13 x float> %array_vector113.i, float %151, i32 10
  %array_vector115.i = insertelement <13 x float> %array_vector114.i, float %152, i32 11
  %array_vector116.i = insertelement <13 x float> %array_vector115.i, float %153, i32 12
  %201 = insertelement <13 x float> %array_vector116.i, float %196, i32 %200
  %202 = extractelement <13 x float> %201, i32 0
  %203 = extractelement <13 x float> %201, i32 1
  %204 = extractelement <13 x float> %201, i32 2
  %205 = extractelement <13 x float> %201, i32 3
  %206 = extractelement <13 x float> %201, i32 4
  %207 = extractelement <13 x float> %201, i32 5
  %208 = extractelement <13 x float> %201, i32 6
  %209 = extractelement <13 x float> %201, i32 7
  %210 = extractelement <13 x float> %201, i32 8
  %211 = extractelement <13 x float> %201, i32 9
  %212 = extractelement <13 x float> %201, i32 10
  %213 = extractelement <13 x float> %201, i32 11
  %214 = extractelement <13 x float> %201, i32 12
  %array_vector117.i = insertelement <13 x float> undef, float %155, i32 0
  %array_vector118.i = insertelement <13 x float> %array_vector117.i, float %156, i32 1
  %array_vector119.i = insertelement <13 x float> %array_vector118.i, float %157, i32 2
  %array_vector120.i = insertelement <13 x float> %array_vector119.i, float %158, i32 3
  %array_vector121.i = insertelement <13 x float> %array_vector120.i, float %159, i32 4
  %array_vector122.i = insertelement <13 x float> %array_vector121.i, float %160, i32 5
  %array_vector123.i = insertelement <13 x float> %array_vector122.i, float %161, i32 6
  %array_vector124.i = insertelement <13 x float> %array_vector123.i, float %162, i32 7
  %array_vector125.i = insertelement <13 x float> %array_vector124.i, float %163, i32 8
  %array_vector126.i = insertelement <13 x float> %array_vector125.i, float %164, i32 9
  %array_vector127.i = insertelement <13 x float> %array_vector126.i, float %165, i32 10
  %array_vector128.i = insertelement <13 x float> %array_vector127.i, float %166, i32 11
  %array_vector129.i = insertelement <13 x float> %array_vector128.i, float %167, i32 12
  %215 = insertelement <13 x float> %array_vector129.i, float %197, i32 %200
  %216 = extractelement <13 x float> %215, i32 0
  %217 = extractelement <13 x float> %215, i32 1
  %218 = extractelement <13 x float> %215, i32 2
  %219 = extractelement <13 x float> %215, i32 3
  %220 = extractelement <13 x float> %215, i32 4
  %221 = extractelement <13 x float> %215, i32 5
  %222 = extractelement <13 x float> %215, i32 6
  %223 = extractelement <13 x float> %215, i32 7
  %224 = extractelement <13 x float> %215, i32 8
  %225 = extractelement <13 x float> %215, i32 9
  %226 = extractelement <13 x float> %215, i32 10
  %227 = extractelement <13 x float> %215, i32 11
  %228 = extractelement <13 x float> %215, i32 12
  %array_vector130.i = insertelement <13 x float> undef, float %169, i32 0
  %array_vector131.i = insertelement <13 x float> %array_vector130.i, float %170, i32 1
  %array_vector132.i = insertelement <13 x float> %array_vector131.i, float %171, i32 2
  %array_vector133.i = insertelement <13 x float> %array_vector132.i, float %172, i32 3
  %array_vector134.i = insertelement <13 x float> %array_vector133.i, float %173, i32 4
  %array_vector135.i = insertelement <13 x float> %array_vector134.i, float %174, i32 5
  %array_vector136.i = insertelement <13 x float> %array_vector135.i, float %175, i32 6
  %array_vector137.i = insertelement <13 x float> %array_vector136.i, float %176, i32 7
  %array_vector138.i = insertelement <13 x float> %array_vector137.i, float %177, i32 8
  %array_vector139.i = insertelement <13 x float> %array_vector138.i, float %178, i32 9
  %array_vector140.i = insertelement <13 x float> %array_vector139.i, float %179, i32 10
  %array_vector141.i = insertelement <13 x float> %array_vector140.i, float %180, i32 11
  %array_vector142.i = insertelement <13 x float> %array_vector141.i, float %181, i32 12
  %229 = insertelement <13 x float> %array_vector142.i, float %198, i32 %200
  %230 = extractelement <13 x float> %229, i32 0
  %231 = extractelement <13 x float> %229, i32 1
  %232 = extractelement <13 x float> %229, i32 2
  %233 = extractelement <13 x float> %229, i32 3
  %234 = extractelement <13 x float> %229, i32 4
  %235 = extractelement <13 x float> %229, i32 5
  %236 = extractelement <13 x float> %229, i32 6
  %237 = extractelement <13 x float> %229, i32 7
  %238 = extractelement <13 x float> %229, i32 8
  %239 = extractelement <13 x float> %229, i32 9
  %240 = extractelement <13 x float> %229, i32 10
  %241 = extractelement <13 x float> %229, i32 11
  %242 = extractelement <13 x float> %229, i32 12
  %array_vector143.i = insertelement <13 x float> undef, float %183, i32 0
  %array_vector144.i = insertelement <13 x float> %array_vector143.i, float %184, i32 1
  %array_vector145.i = insertelement <13 x float> %array_vector144.i, float %185, i32 2
  %array_vector146.i = insertelement <13 x float> %array_vector145.i, float %186, i32 3
  %array_vector147.i = insertelement <13 x float> %array_vector146.i, float %187, i32 4
  %array_vector148.i = insertelement <13 x float> %array_vector147.i, float %188, i32 5
  %array_vector149.i = insertelement <13 x float> %array_vector148.i, float %189, i32 6
  %array_vector150.i = insertelement <13 x float> %array_vector149.i, float %190, i32 7
  %array_vector151.i = insertelement <13 x float> %array_vector150.i, float %191, i32 8
  %array_vector152.i = insertelement <13 x float> %array_vector151.i, float %192, i32 9
  %array_vector153.i = insertelement <13 x float> %array_vector152.i, float %193, i32 10
  %array_vector154.i = insertelement <13 x float> %array_vector153.i, float %194, i32 11
  %array_vector155.i = insertelement <13 x float> %array_vector154.i, float %195, i32 12
  %243 = insertelement <13 x float> %array_vector155.i, float %199, i32 %200
  %244 = extractelement <13 x float> %243, i32 0
  %245 = extractelement <13 x float> %243, i32 1
  %246 = extractelement <13 x float> %243, i32 2
  %247 = extractelement <13 x float> %243, i32 3
  %248 = extractelement <13 x float> %243, i32 4
  %249 = extractelement <13 x float> %243, i32 5
  %250 = extractelement <13 x float> %243, i32 6
  %251 = extractelement <13 x float> %243, i32 7
  %252 = extractelement <13 x float> %243, i32 8
  %253 = extractelement <13 x float> %243, i32 9
  %254 = extractelement <13 x float> %243, i32 10
  %255 = extractelement <13 x float> %243, i32 11
  %256 = extractelement <13 x float> %243, i32 12
  %257 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 48)
  %258 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 52)
  %259 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 56)
  %260 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 60)
  %261 = add i32 %73, 4
  %array_vector156.i = insertelement <13 x float> undef, float %202, i32 0
  %array_vector157.i = insertelement <13 x float> %array_vector156.i, float %203, i32 1
  %array_vector158.i = insertelement <13 x float> %array_vector157.i, float %204, i32 2
  %array_vector159.i = insertelement <13 x float> %array_vector158.i, float %205, i32 3
  %array_vector160.i = insertelement <13 x float> %array_vector159.i, float %206, i32 4
  %array_vector161.i = insertelement <13 x float> %array_vector160.i, float %207, i32 5
  %array_vector162.i = insertelement <13 x float> %array_vector161.i, float %208, i32 6
  %array_vector163.i = insertelement <13 x float> %array_vector162.i, float %209, i32 7
  %array_vector164.i = insertelement <13 x float> %array_vector163.i, float %210, i32 8
  %array_vector165.i = insertelement <13 x float> %array_vector164.i, float %211, i32 9
  %array_vector166.i = insertelement <13 x float> %array_vector165.i, float %212, i32 10
  %array_vector167.i = insertelement <13 x float> %array_vector166.i, float %213, i32 11
  %array_vector168.i = insertelement <13 x float> %array_vector167.i, float %214, i32 12
  %262 = insertelement <13 x float> %array_vector168.i, float %257, i32 %261
  %array_vector169.i = insertelement <13 x float> undef, float %216, i32 0
  %array_vector170.i = insertelement <13 x float> %array_vector169.i, float %217, i32 1
  %array_vector171.i = insertelement <13 x float> %array_vector170.i, float %218, i32 2
  %array_vector172.i = insertelement <13 x float> %array_vector171.i, float %219, i32 3
  %array_vector173.i = insertelement <13 x float> %array_vector172.i, float %220, i32 4
  %array_vector174.i = insertelement <13 x float> %array_vector173.i, float %221, i32 5
  %array_vector175.i = insertelement <13 x float> %array_vector174.i, float %222, i32 6
  %array_vector176.i = insertelement <13 x float> %array_vector175.i, float %223, i32 7
  %array_vector177.i = insertelement <13 x float> %array_vector176.i, float %224, i32 8
  %array_vector178.i = insertelement <13 x float> %array_vector177.i, float %225, i32 9
  %array_vector179.i = insertelement <13 x float> %array_vector178.i, float %226, i32 10
  %array_vector180.i = insertelement <13 x float> %array_vector179.i, float %227, i32 11
  %array_vector181.i = insertelement <13 x float> %array_vector180.i, float %228, i32 12
  %263 = insertelement <13 x float> %array_vector181.i, float %258, i32 %261
  %array_vector182.i = insertelement <13 x float> undef, float %230, i32 0
  %array_vector183.i = insertelement <13 x float> %array_vector182.i, float %231, i32 1
  %array_vector184.i = insertelement <13 x float> %array_vector183.i, float %232, i32 2
  %array_vector185.i = insertelement <13 x float> %array_vector184.i, float %233, i32 3
  %array_vector186.i = insertelement <13 x float> %array_vector185.i, float %234, i32 4
  %array_vector187.i = insertelement <13 x float> %array_vector186.i, float %235, i32 5
  %array_vector188.i = insertelement <13 x float> %array_vector187.i, float %236, i32 6
  %array_vector189.i = insertelement <13 x float> %array_vector188.i, float %237, i32 7
  %array_vector190.i = insertelement <13 x float> %array_vector189.i, float %238, i32 8
  %array_vector191.i = insertelement <13 x float> %array_vector190.i, float %239, i32 9
  %array_vector192.i = insertelement <13 x float> %array_vector191.i, float %240, i32 10
  %array_vector193.i = insertelement <13 x float> %array_vector192.i, float %241, i32 11
  %array_vector194.i = insertelement <13 x float> %array_vector193.i, float %242, i32 12
  %264 = insertelement <13 x float> %array_vector194.i, float %259, i32 %261
  %array_vector195.i = insertelement <13 x float> undef, float %244, i32 0
  %array_vector196.i = insertelement <13 x float> %array_vector195.i, float %245, i32 1
  %array_vector197.i = insertelement <13 x float> %array_vector196.i, float %246, i32 2
  %array_vector198.i = insertelement <13 x float> %array_vector197.i, float %247, i32 3
  %array_vector199.i = insertelement <13 x float> %array_vector198.i, float %248, i32 4
  %array_vector200.i = insertelement <13 x float> %array_vector199.i, float %249, i32 5
  %array_vector201.i = insertelement <13 x float> %array_vector200.i, float %250, i32 6
  %array_vector202.i = insertelement <13 x float> %array_vector201.i, float %251, i32 7
  %array_vector203.i = insertelement <13 x float> %array_vector202.i, float %252, i32 8
  %array_vector204.i = insertelement <13 x float> %array_vector203.i, float %253, i32 9
  %array_vector205.i = insertelement <13 x float> %array_vector204.i, float %254, i32 10
  %array_vector206.i = insertelement <13 x float> %array_vector205.i, float %255, i32 11
  %array_vector207.i = insertelement <13 x float> %array_vector206.i, float %256, i32 12
  %265 = insertelement <13 x float> %array_vector207.i, float %260, i32 %261
  %266 = extractelement <13 x float> %262, i32 %139
  %267 = extractelement <13 x float> %263, i32 %139
  %268 = extractelement <13 x float> %264, i32 %139
  %269 = extractelement <13 x float> %265, i32 %139
  %270 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 80)
  %271 = bitcast float %270 to i32
  %272 = icmp eq i32 %271, 0
  %273 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %26, i32 96)
  %274 = select i1 %272, float %273, float %266
  %275 = icmp eq i32 %271, 1
  %276 = select i1 %275, float %273, float %267
  %277 = icmp eq i32 %271, 2
  %278 = select i1 %277, float %273, float %268
  %279 = icmp eq i32 %271, 3
  %280 = select i1 %279, float %273, float %269
  %281 = insertelement <13 x float> %262, float %274, i32 %139
  %282 = extractelement <13 x float> %281, i32 0
  %283 = extractelement <13 x float> %281, i32 1
  %284 = extractelement <13 x float> %281, i32 2
  %285 = extractelement <13 x float> %281, i32 3
  %286 = extractelement <13 x float> %281, i32 4
  %287 = extractelement <13 x float> %281, i32 5
  %288 = extractelement <13 x float> %281, i32 6
  %289 = extractelement <13 x float> %281, i32 7
  %290 = extractelement <13 x float> %281, i32 8
  %291 = extractelement <13 x float> %281, i32 9
  %292 = extractelement <13 x float> %281, i32 10
  %293 = extractelement <13 x float> %281, i32 11
  %294 = extractelement <13 x float> %281, i32 12
  %295 = insertelement <13 x float> %263, float %276, i32 %139
  %296 = extractelement <13 x float> %295, i32 0
  %297 = extractelement <13 x float> %295, i32 1
  %298 = extractelement <13 x float> %295, i32 2
  %299 = extractelement <13 x float> %295, i32 3
  %300 = extractelement <13 x float> %295, i32 4
  %301 = extractelement <13 x float> %295, i32 5
  %302 = extractelement <13 x float> %295, i32 6
  %303 = extractelement <13 x float> %295, i32 7
  %304 = extractelement <13 x float> %295, i32 8
  %305 = extractelement <13 x float> %295, i32 9
  %306 = extractelement <13 x float> %295, i32 10
  %307 = extractelement <13 x float> %295, i32 11
  %308 = extractelement <13 x float> %295, i32 12
  %309 = insertelement <13 x float> %264, float %278, i32 %139
  %310 = extractelement <13 x float> %309, i32 0
  %311 = extractelement <13 x float> %309, i32 1
  %312 = extractelement <13 x float> %309, i32 2
  %313 = extractelement <13 x float> %309, i32 3
  %314 = extractelement <13 x float> %309, i32 4
  %315 = extractelement <13 x float> %309, i32 5
  %316 = extractelement <13 x float> %309, i32 6
  %317 = extractelement <13 x float> %309, i32 7
  %318 = extractelement <13 x float> %309, i32 8
  %319 = extractelement <13 x float> %309, i32 9
  %320 = extractelement <13 x float> %309, i32 10
  %321 = extractelement <13 x float> %309, i32 11
  %322 = extractelement <13 x float> %309, i32 12
  %323 = insertelement <13 x float> %265, float %280, i32 %139
  %324 = extractelement <13 x float> %323, i32 0
  %325 = extractelement <13 x float> %323, i32 1
  %326 = extractelement <13 x float> %323, i32 2
  %327 = extractelement <13 x float> %323, i32 3
  %328 = extractelement <13 x float> %323, i32 4
  %329 = extractelement <13 x float> %323, i32 5
  %330 = extractelement <13 x float> %323, i32 6
  %331 = extractelement <13 x float> %323, i32 7
  %332 = extractelement <13 x float> %323, i32 8
  %333 = extractelement <13 x float> %323, i32 9
  %334 = extractelement <13 x float> %323, i32 10
  %335 = extractelement <13 x float> %323, i32 11
  %336 = extractelement <13 x float> %323, i32 12
  call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float %282, float %296, float %310, float %324, i1 true, i1 false) #2, !noalias !1
  call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %283, float %297, float %311, float %325, i1 false, i1 false) #2, !noalias !1
  call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float %284, float %298, float %312, float %326, i1 false, i1 false) #2, !noalias !1
  call void @llvm.amdgcn.exp.f32(i32 34, i32 15, float %285, float %299, float %313, float %327, i1 false, i1 false) #2, !noalias !1
  call void @llvm.amdgcn.exp.f32(i32 35, i32 15, float %286, float %300, float %314, float %328, i1 false, i1 false) #2, !noalias !1
  call void @llvm.amdgcn.exp.f32(i32 36, i32 15, float %287, float %301, float %315, float %329, i1 false, i1 false) #2, !noalias !1
  call void @llvm.amdgcn.exp.f32(i32 37, i32 15, float %288, float %302, float %316, float %330, i1 false, i1 false) #2, !noalias !1
  call void @llvm.amdgcn.exp.f32(i32 38, i32 15, float %289, float %303, float %317, float %331, i1 false, i1 false) #2, !noalias !1
  call void @llvm.amdgcn.exp.f32(i32 39, i32 15, float %290, float %304, float %318, float %332, i1 false, i1 false) #2, !noalias !1
  call void @llvm.amdgcn.exp.f32(i32 40, i32 15, float %291, float %305, float %319, float %333, i1 false, i1 false) #2, !noalias !1
  call void @llvm.amdgcn.exp.f32(i32 41, i32 15, float %292, float %306, float %320, float %334, i1 false, i1 false) #2, !noalias !1
  call void @llvm.amdgcn.exp.f32(i32 42, i32 15, float %293, float %307, float %321, float %335, i1 false, i1 false) #2, !noalias !1
  call void @llvm.amdgcn.exp.f32(i32 43, i32 15, float %294, float %308, float %322, float %336, i1 false, i1 false) #2, !noalias !1
  ret void
}

attributes #0 = { nounwind readonly }
attributes #1 = { nounwind readnone }
attributes #2 = { nounwind }
attributes #3 = { "no-signed-zeros-fp-math"="true" }

!0 = !{}
!1 = !{!2}
!2 = distinct !{!2, !3, !"main: argument 1"}
!3 = distinct !{!3, !"main"}
!4 = !{!5}
!5 = distinct !{!5, !3, !"main: argument 0"}

SHADER KEY
  part.vs.prolog.instance_divisor_is_one = 0
  part.vs.prolog.instance_divisor_is_fetched = 0
  part.vs.prolog.ls_vgpr_fix = 0
  mono.vs.fix_fetch = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
  as_es = 0
  as_ls = 0
  mono.u.vs_export_prim_id = 0
  opt.kill_outputs = 0x0
  opt.clip_disable = 0

Vertex Shader as VS:
Shader main disassembly:
	s_load_dwordx4 s[0:3], s[8:9], 0x0                    ; C00A0004 00000000
	v_add_i32_e32 v0, vcc, s10, v0                        ; 3200000A
	s_mov_b32 s7, 0x27fac                                 ; BE8700FF 00027FAC
	s_movk_i32 s6, 0xb0                                   ; B00600B0
	s_buffer_load_dword s8, s[4:7], 0x3c                  ; C0220202 0000003C
	s_waitcnt lgkmcnt(0)                                  ; BF8C007F
	buffer_load_format_xyzw v[13:16], v0, s[0:3], 0 idxen ; E00C2000 80000D00
	s_buffer_load_dword s9, s[4:7], 0x40                  ; C0220242 00000040
	s_buffer_load_dword s10, s[4:7], 0x50                 ; C0220282 00000050
	s_buffer_load_dword s11, s[4:7], 0x60                 ; C02202C2 00000060
	s_buffer_load_dword s12, s[4:7], 0x70                 ; C0220302 00000070
	s_buffer_load_dword s13, s[4:7], 0x74                 ; C0220342 00000074
	s_buffer_load_dword s14, s[4:7], 0x78                 ; C0220382 00000078
	s_buffer_load_dword s15, s[4:7], 0x7c                 ; C02203C2 0000007C
	s_buffer_load_dword s16, s[4:7], 0x80                 ; C0220402 00000080
	s_buffer_load_dword s17, s[4:7], 0x84                 ; C0220442 00000084
	s_buffer_load_dword s18, s[4:7], 0x88                 ; C0220482 00000088
	s_buffer_load_dword s19, s[4:7], 0x8c                 ; C02204C2 0000008C
	s_buffer_load_dword s20, s[4:7], 0x90                 ; C0220502 00000090
	s_buffer_load_dword s21, s[4:7], 0x94                 ; C0220542 00000094
	s_buffer_load_dword s22, s[4:7], 0x98                 ; C0220582 00000098
	s_buffer_load_dword s23, s[4:7], 0x9c                 ; C02205C2 0000009C
	s_buffer_load_dword s24, s[4:7], 0xa0                 ; C0220602 000000A0
	s_buffer_load_dword s25, s[4:7], 0xa4                 ; C0220642 000000A4
	s_buffer_load_dword s26, s[4:7], 0xa8                 ; C0220682 000000A8
	s_buffer_load_dword s27, s[4:7], 0xac                 ; C02206C2 000000AC
	s_buffer_load_dword s28, s[4:7], 0x0                  ; C0220702 00000000
	s_buffer_load_dword s29, s[4:7], 0x4                  ; C0220742 00000004
	s_buffer_load_dword s30, s[4:7], 0x8                  ; C0220782 00000008
	v_mov_b32_e32 v1, 0                                   ; 7E020280
	s_buffer_load_dword s31, s[4:7], 0xc                  ; C02207C2 0000000C
	s_buffer_load_dword s32, s[4:7], 0x10                 ; C0220802 00000010
	s_buffer_load_dword s33, s[4:7], 0x14                 ; C0220842 00000014
	s_buffer_load_dword s34, s[4:7], 0x18                 ; C0220882 00000018
	s_waitcnt lgkmcnt(0)                                  ; BF8C007F
	s_lshl_b32 s0, s9, 2                                  ; 8E008209
	s_buffer_load_dword s35, s[4:7], 0x1c                 ; C02208C2 0000001C
	s_mov_b32 m0, s0                                      ; BEFC0000
	s_buffer_load_dword s0, s[4:7], 0x28                  ; C0220002 00000028
	v_mov_b32_e32 v2, v1                                  ; 7E040301
	v_mov_b32_e32 v3, v1                                  ; 7E060301
	v_mov_b32_e32 v4, v1                                  ; 7E080301
	v_mov_b32_e32 v5, v1                                  ; 7E0A0301
	v_mov_b32_e32 v6, v1                                  ; 7E0C0301
	v_mov_b32_e32 v7, v1                                  ; 7E0E0301
	v_mov_b32_e32 v8, v1                                  ; 7E100301
	v_mov_b32_e32 v9, v1                                  ; 7E120301
	v_mov_b32_e32 v10, v1                                 ; 7E140301
	v_mov_b32_e32 v11, v1                                 ; 7E160301
	v_mov_b32_e32 v12, v1                                 ; 7E180301
	s_buffer_load_dword s36, s[4:7], 0x20                 ; C0220902 00000020
	s_buffer_load_dword s1, s[4:7], 0x2c                  ; C0220042 0000002C
	s_buffer_load_dword s37, s[4:7], 0x24                 ; C0220942 00000024
	s_buffer_load_dword s2, s[4:7], 0x30                  ; C0220082 00000030
	s_buffer_load_dword s3, s[4:7], 0x34                  ; C02200C2 00000034
	v_mov_b32_e32 v66, s30                                ; 7E84021E
	v_mov_b32_e32 v64, s28                                ; 7E80021C
	v_mov_b32_e32 v67, s31                                ; 7E86021F
	v_mov_b32_e32 v70, s34                                ; 7E8C0222
	s_buffer_load_dword s4, s[4:7], 0x38                  ; C0220102 00000038
	v_mov_b32_e32 v65, s29                                ; 7E82021D
	v_mov_b32_e32 v68, s32                                ; 7E880220
	s_waitcnt lgkmcnt(0)                                  ; BF8C007F
	v_mov_b32_e32 v71, s35                                ; 7E8E0223
	v_mov_b32_e32 v69, s33                                ; 7E8A0221
	v_mov_b32_e32 v72, s36                                ; 7E900224
	v_mov_b32_e32 v73, s37                                ; 7E920225
	v_cmp_eq_u32_e64 vcc, s10, 0                          ; D0CA006A 0001000A
	s_waitcnt vmcnt(0)                                    ; BF8C0F70
	v_mul_f32_e32 v0, s12, v13                            ; 0A001A0C
	v_mul_f32_e32 v74, s13, v13                           ; 0A941A0D
	v_mul_f32_e32 v75, s14, v13                           ; 0A961A0E
	v_mul_f32_e32 v13, s15, v13                           ; 0A1A1A0F
	v_mac_f32_e32 v0, s16, v14                            ; 2C001C10
	v_mac_f32_e32 v13, s19, v14                           ; 2C1A1C13
	v_mac_f32_e32 v0, s20, v15                            ; 2C001E14
	v_mac_f32_e32 v13, s23, v15                           ; 2C1A1E17
	v_mac_f32_e32 v75, s18, v14                           ; 2C961C12
	v_mac_f32_e32 v0, s24, v16                            ; 2C002018
	v_mac_f32_e32 v13, s27, v16                           ; 2C1A201B
	v_mov_b32_e32 v63, v15                                ; 7E7E030F
	v_mac_f32_e32 v74, s17, v14                           ; 2C941C11
	v_mac_f32_e32 v75, s22, v15                           ; 2C961E16
	v_mac_f32_e32 v74, s21, v15                           ; 2C941E15
	v_mac_f32_e32 v75, s26, v16                           ; 2C96201A
	v_mov_b32_e32 v48, v0                                 ; 7E600300
	v_mov_b32_e32 v47, v15                                ; 7E5E030F
	v_mac_f32_e32 v74, s25, v16                           ; 2C942019
	v_mov_b32_e32 v31, v15                                ; 7E3E030F
	v_mov_b32_e32 v62, v14                                ; 7E7C030E
	v_mov_b32_e32 v61, v13                                ; 7E7A030D
	v_mov_b32_e32 v60, v12                                ; 7E78030C
	v_mov_b32_e32 v59, v11                                ; 7E76030B
	v_mov_b32_e32 v58, v10                                ; 7E74030A
	v_mov_b32_e32 v57, v9                                 ; 7E720309
	v_mov_b32_e32 v56, v8                                 ; 7E700308
	v_mov_b32_e32 v55, v7                                 ; 7E6E0307
	v_mov_b32_e32 v54, v6                                 ; 7E6C0306
	v_mov_b32_e32 v53, v5                                 ; 7E6A0305
	v_mov_b32_e32 v52, v4                                 ; 7E680304
	v_mov_b32_e32 v51, v3                                 ; 7E660303
	v_mov_b32_e32 v50, v2                                 ; 7E640302
	v_mov_b32_e32 v49, v1                                 ; 7E620301
	v_mov_b32_e32 v48, v75                                ; 7E60034B
	v_movreld_b32_e32 v49, v66                            ; 7E626D42
	v_mov_b32_e32 v32, v0                                 ; 7E400300
	v_mov_b32_e32 v30, v14                                ; 7E3C030E
	v_mov_b32_e32 v29, v13                                ; 7E3A030D
	v_mov_b32_e32 v28, v12                                ; 7E38030C
	v_mov_b32_e32 v27, v11                                ; 7E36030B
	v_mov_b32_e32 v26, v10                                ; 7E34030A
	v_mov_b32_e32 v25, v9                                 ; 7E320309
	v_mov_b32_e32 v24, v8                                 ; 7E300308
	v_mov_b32_e32 v23, v7                                 ; 7E2E0307
	v_mov_b32_e32 v22, v6                                 ; 7E2C0306
	v_mov_b32_e32 v21, v5                                 ; 7E2A0305
	v_mov_b32_e32 v20, v4                                 ; 7E280304
	v_mov_b32_e32 v19, v3                                 ; 7E260303
	v_mov_b32_e32 v18, v2                                 ; 7E240302
	v_mov_b32_e32 v17, v1                                 ; 7E220301
	v_mov_b32_e32 v16, v0                                 ; 7E200300
	v_mov_b32_e32 v46, v14                                ; 7E5C030E
	v_mov_b32_e32 v45, v13                                ; 7E5A030D
	v_mov_b32_e32 v44, v12                                ; 7E58030C
	v_mov_b32_e32 v43, v11                                ; 7E56030B
	v_mov_b32_e32 v42, v10                                ; 7E54030A
	v_mov_b32_e32 v41, v9                                 ; 7E520309
	v_mov_b32_e32 v40, v8                                 ; 7E500308
	v_mov_b32_e32 v39, v7                                 ; 7E4E0307
	v_mov_b32_e32 v38, v6                                 ; 7E4C0306
	v_mov_b32_e32 v37, v5                                 ; 7E4A0305
	v_mov_b32_e32 v36, v4                                 ; 7E480304
	v_mov_b32_e32 v35, v3                                 ; 7E460303
	v_mov_b32_e32 v34, v2                                 ; 7E440302
	v_mov_b32_e32 v33, v1                                 ; 7E420301
	v_mov_b32_e32 v0, v13                                 ; 7E00030D
	v_movreld_b32_e32 v17, v64                            ; 7E226D40
	v_movreld_b32_e32 v1, v67                             ; 7E026D43
	v_mov_b32_e32 v32, v74                                ; 7E40034A
	v_movreld_b32_e32 v50, v70                            ; 7E646D46
	v_mov_b32_e32 v64, s0                                 ; 7E800200
	v_movreld_b32_e32 v33, v65                            ; 7E426D41
	v_movreld_b32_e32 v18, v68                            ; 7E246D44
	v_movreld_b32_e32 v51, v64                            ; 7E666D40
	v_movreld_b32_e32 v2, v71                             ; 7E046D47
	v_mov_b32_e32 v64, s1                                 ; 7E800201
	v_movreld_b32_e32 v34, v69                            ; 7E446D45
	v_movreld_b32_e32 v3, v64                             ; 7E066D40
	v_movreld_b32_e32 v19, v72                            ; 7E266D48
	v_mov_b32_e32 v64, s2                                 ; 7E800202
	v_movreld_b32_e32 v20, v64                            ; 7E286D40
	v_movreld_b32_e32 v35, v73                            ; 7E466D49
	v_mov_b32_e32 v64, s3                                 ; 7E800203
	v_movreld_b32_e32 v36, v64                            ; 7E486D40
	v_mov_b32_e32 v64, s4                                 ; 7E800204
	v_movreld_b32_e32 v52, v64                            ; 7E686D40
	v_mov_b32_e32 v64, s8                                 ; 7E800208
	v_movreld_b32_e32 v4, v64                             ; 7E086D40
	v_movrels_b32_e32 v64, v18                            ; 7E806F12
	v_mov_b32_e32 v65, s11                                ; 7E82020B
	v_cndmask_b32_e32 v64, v64, v65, vcc                  ; 00808340
	v_movrels_b32_e32 v66, v34                            ; 7E846F22
	v_cmp_eq_u32_e64 vcc, s10, 1                          ; D0CA006A 0001020A
	v_cndmask_b32_e32 v66, v66, v65, vcc                  ; 00848342
	v_movrels_b32_e32 v67, v50                            ; 7E866F32
	v_cmp_eq_u32_e64 vcc, s10, 2                          ; D0CA006A 0001040A
	v_cndmask_b32_e32 v67, v67, v65, vcc                  ; 00868343
	v_movrels_b32_e32 v68, v2                             ; 7E886F02
	v_cmp_eq_u32_e64 vcc, s10, 3                          ; D0CA006A 0001060A
	v_cndmask_b32_e32 v65, v68, v65, vcc                  ; 00828344
	v_movreld_b32_e32 v18, v64                            ; 7E246D40
	v_movreld_b32_e32 v34, v66                            ; 7E446D42
	v_movreld_b32_e32 v50, v67                            ; 7E646D43
	v_movreld_b32_e32 v2, v65                             ; 7E046D41
	exp pos0 v16, v32, v48, v0 done                       ; C40008CF 00302010
	exp param0 v17, v33, v49, v1                          ; C400020F 01312111
	exp param1 v18, v34, v50, v2                          ; C400021F 02322212
	exp param2 v19, v35, v51, v3                          ; C400022F 03332313
	exp param3 v20, v36, v52, v4                          ; C400023F 04342414
	exp param4 v21, v37, v53, v5                          ; C400024F 05352515
	exp param5 v22, v38, v54, v6                          ; C400025F 06362616
	exp param6 v23, v39, v55, v7                          ; C400026F 07372717
	exp param7 v24, v40, v56, v8                          ; C400027F 08382818
	exp param8 v25, v41, v57, v9                          ; C400028F 09392919
	exp param9 v26, v42, v58, v10                         ; C400029F 0A3A2A1A
	exp param10 v27, v43, v59, v11                        ; C40002AF 0B3B2B1B
	exp param11 v28, v44, v60, v12                        ; C40002BF 0C3C2C1C
	s_endpgm                                              ; BF810000

*** SHADER STATS ***
SGPRS: 96
VGPRS: 76
Spilled SGPRs: 0
Spilled VGPRs: 0
Private memory VGPRs: 0
Code Size: 972 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
Max Waves: 3
********************


More information about the llvm-commits mailing list