[llvm] r247074 - AMDGPU/SI: Fix input vcc operand for VOP2b instructions

Wed Sep 9 00:20:57 PDT 2015

Hi Matt,

On 09.09.2015 06:15, Matt Arsenault via llvm-commits wrote:
> Author: arsenm
> Date: Tue Sep  8 16:15:00 2015
> New Revision: 247074
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=247074&view=rev
> Log:
> AMDGPU/SI: Fix input vcc operand for VOP2b instructions
> 
> Adds vcc to output string input for e32. Allows option
> of using e64 encoding with assembler.
> 
> Also fixes these instructions not implicitly reading exec.

This change broke a bunch of fp64 related piglit (graphics) tests on my
Kaveri. I'm attaching an example of good and bad LLVM IR.

-- 
Earthling Michel Dänzer               |               http://www.amd.com
Libre software enthusiast             |             Mesa and X developer
-------------- next part --------------
SHADER KEY
  export_16bpc = 0x3
  last_cbuf = 0
  color_two_side = 0
  alpha_func = 7
  alpha_to_one = 0
  poly_stipple = 0
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL OUT[0], COLOR
DCL CONST[0..4]
DCL TEMP[0..5], LOCAL
IMM[0] FLT32 {    0.0000,     1.0000,     0.0000,     0.0000}
  0: DNEG TEMP[0].xy, CONST[0].zwzw
  1: DNEG TEMP[1].xy, CONST[1].xyxy
  2: DMUL TEMP[2].xy, CONST[1].xyxy, CONST[0].zwzw
  3: DNEG TEMP[2].xy, TEMP[2].xyxy
  4: DMAD TEMP[2].xy, CONST[0].xyxy, CONST[1].zwzw, TEMP[2].xyxy
  5: DRCP TEMP[3].xy, TEMP[2].xyxy
  6: DNEG TEMP[4].xy, CONST[3].xyxy
  7: DNEG TEMP[4].zw, CONST[3].zwzw
  8: DMAD TEMP[5].xy, CONST[1].zwzw, TEMP[3].xyxy, TEMP[4].xyxy
  9: DMAD TEMP[5].zw, TEMP[0].xyxy, TEMP[3].xyxy, TEMP[4].zwzw
 10: DRCP TEMP[0].xy, TEMP[2].xyxy
 11: DNEG TEMP[2].xy, CONST[4].xyxy
 12: DNEG TEMP[2].zw, CONST[4].zwzw
 13: DMAD TEMP[1].xy, TEMP[1].xyxy, TEMP[0].xyxy, TEMP[2].xyxy
 14: DMAD TEMP[1].zw, CONST[0].xyxy, TEMP[0].xyxy, TEMP[2].zwzw
 15: DMUL TEMP[0].xy, TEMP[5].xyxy, TEMP[5].xyxy
 16: DMAD TEMP[0].xy, TEMP[5].zwzw, TEMP[5].zwzw, TEMP[0].xyxy
 17: DMAD TEMP[0].xy, TEMP[1].xyxy, TEMP[1].xyxy, TEMP[0].xyxy
 18: DMAD TEMP[0].xy, TEMP[1].zwzw, TEMP[1].zwzw, TEMP[0].xyxy
 19: DMUL TEMP[1].xy, CONST[2].xyxy, CONST[2].xyxy
 20: DSGE TEMP[0].x, TEMP[1].xyxy, TEMP[0].xyxy
 21: UIF TEMP[0].xxxx :0
 22:   MOV TEMP[0], IMM[0].xyxy
 23: ELSE :0
 24:   MOV TEMP[0], IMM[0].yxxy
 25: ENDIF
 26: MOV OUT[0], TEMP[0]
 27: END
; ModuleID = 'tgsi'

define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
  %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
  %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
  %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
  %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4)
  %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8)
  %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12)
  %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16)
  %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20)
  %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24)
  %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28)
  %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32)
  %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36)
  %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48)
  %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52)
  %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56)
  %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60)
  %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64)
  %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68)
  %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72)
  %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76)
  %42 = bitcast float %26 to i32
  %43 = insertelement <2 x i32> undef, i32 %42, i32 0
  %44 = bitcast float %27 to i32
  %45 = insertelement <2 x i32> %43, i32 %44, i32 1
  %46 = bitcast <2 x i32> %45 to double
  %47 = bitcast float %28 to i32
  %48 = insertelement <2 x i32> undef, i32 %47, i32 0
  %49 = bitcast float %29 to i32
  %50 = insertelement <2 x i32> %48, i32 %49, i32 1
  %51 = bitcast <2 x i32> %50 to double
  %52 = bitcast float %28 to i32
  %53 = insertelement <2 x i32> undef, i32 %52, i32 0
  %54 = bitcast float %29 to i32
  %55 = insertelement <2 x i32> %53, i32 %54, i32 1
  %56 = bitcast <2 x i32> %55 to double
  %57 = bitcast float %26 to i32
  %58 = insertelement <2 x i32> undef, i32 %57, i32 0
  %59 = bitcast float %27 to i32
  %60 = insertelement <2 x i32> %58, i32 %59, i32 1
  %61 = bitcast <2 x i32> %60 to double
  %62 = fmul double %56, %61
  %63 = bitcast float %24 to i32
  %64 = insertelement <2 x i32> undef, i32 %63, i32 0
  %65 = bitcast float %25 to i32
  %66 = insertelement <2 x i32> %64, i32 %65, i32 1
  %67 = bitcast <2 x i32> %66 to double
  %68 = bitcast float %30 to i32
  %69 = insertelement <2 x i32> undef, i32 %68, i32 0
  %70 = bitcast float %31 to i32
  %71 = insertelement <2 x i32> %69, i32 %70, i32 1
  %72 = bitcast <2 x i32> %71 to double
  %73 = fmul double %67, %72
  %74 = fsub double %73, %62
  %75 = fdiv double 1.000000e+00, %74
  %76 = bitcast float %34 to i32
  %77 = insertelement <2 x i32> undef, i32 %76, i32 0
  %78 = bitcast float %35 to i32
  %79 = insertelement <2 x i32> %77, i32 %78, i32 1
  %80 = bitcast <2 x i32> %79 to double
  %81 = bitcast float %36 to i32
  %82 = insertelement <2 x i32> undef, i32 %81, i32 0
  %83 = bitcast float %37 to i32
  %84 = insertelement <2 x i32> %82, i32 %83, i32 1
  %85 = bitcast <2 x i32> %84 to double
  %86 = fsub double -0.000000e+00, %85
  %87 = bitcast float %30 to i32
  %88 = insertelement <2 x i32> undef, i32 %87, i32 0
  %89 = bitcast float %31 to i32
  %90 = insertelement <2 x i32> %88, i32 %89, i32 1
  %91 = bitcast <2 x i32> %90 to double
  %92 = fmul double %91, %75
  %93 = fsub double %92, %80
  %94 = fmul double %46, %75
  %95 = fsub double %86, %94
  %96 = fdiv double 1.000000e+00, %74
  %97 = bitcast float %38 to i32
  %98 = insertelement <2 x i32> undef, i32 %97, i32 0
  %99 = bitcast float %39 to i32
  %100 = insertelement <2 x i32> %98, i32 %99, i32 1
  %101 = bitcast <2 x i32> %100 to double
  %102 = fsub double -0.000000e+00, %101
  %103 = bitcast float %40 to i32
  %104 = insertelement <2 x i32> undef, i32 %103, i32 0
  %105 = bitcast float %41 to i32
  %106 = insertelement <2 x i32> %104, i32 %105, i32 1
  %107 = bitcast <2 x i32> %106 to double
  %108 = fmul double %51, %96
  %109 = fsub double %102, %108
  %110 = bitcast float %24 to i32
  %111 = insertelement <2 x i32> undef, i32 %110, i32 0
  %112 = bitcast float %25 to i32
  %113 = insertelement <2 x i32> %111, i32 %112, i32 1
  %114 = bitcast <2 x i32> %113 to double
  %115 = fmul double %114, %96
  %116 = fsub double %115, %107
  %117 = fmul double %93, %93
  %118 = fmul double %95, %95
  %119 = fadd double %118, %117
  %120 = fmul double %109, %109
  %121 = fadd double %120, %119
  %122 = fmul double %116, %116
  %123 = fadd double %122, %121
  %124 = bitcast float %32 to i32
  %125 = insertelement <2 x i32> undef, i32 %124, i32 0
  %126 = bitcast float %33 to i32
  %127 = insertelement <2 x i32> %125, i32 %126, i32 1
  %128 = bitcast <2 x i32> %127 to double
  %129 = bitcast float %32 to i32
  %130 = insertelement <2 x i32> undef, i32 %129, i32 0
  %131 = bitcast float %33 to i32
  %132 = insertelement <2 x i32> %130, i32 %131, i32 1
  %133 = bitcast <2 x i32> %132 to double
  %134 = fmul double %128, %133
  %135 = fcmp oge double %134, %123
  %. = select i1 %135, float 1.000000e+00, float 0.000000e+00
  %.24 = select i1 %135, float 0.000000e+00, float 1.000000e+00
  %136 = call i32 @llvm.SI.packf16(float %.24, float %.)
  %137 = bitcast i32 %136 to float
  %138 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 1.000000e+00)
  %139 = bitcast i32 %138 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %137, float %139, float %137, float %139)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }

!0 = !{!"const", null, i32 1}

Shader Disassembly:

	s_load_dwordx4 s[0:3], s[2:3], 0x0                    ; C0800300
	s_waitcnt lgkmcnt(0)                                  ; BF8C007F
	s_buffer_load_dword s4, s[0:3], 0x0                   ; C2020100
	s_buffer_load_dword s5, s[0:3], 0x1                   ; C2028101
	s_buffer_load_dword s6, s[0:3], 0x2                   ; C2030102
	s_buffer_load_dword s7, s[0:3], 0x3                   ; C2038103
	s_buffer_load_dword s8, s[0:3], 0x4                   ; C2040104
	s_buffer_load_dword s9, s[0:3], 0x5                   ; C2048105
	s_buffer_load_dword s10, s[0:3], 0x6                  ; C2050106
	s_buffer_load_dword s11, s[0:3], 0x7                  ; C2058107
	s_buffer_load_dword s12, s[0:3], 0x8                  ; C2060108
	s_buffer_load_dword s13, s[0:3], 0x9                  ; C2068109
	s_waitcnt lgkmcnt(0)                                  ; BF8C007F
	v_mov_b32_e32 v0, s4                                  ; 7E000204
	s_buffer_load_dword s4, s[0:3], 0xc                   ; C202010C
	v_mov_b32_e32 v1, s5                                  ; 7E020205
	s_buffer_load_dword s5, s[0:3], 0xd                   ; C202810D
	s_buffer_load_dword s14, s[0:3], 0xe                  ; C207010E
	s_buffer_load_dword s15, s[0:3], 0xf                  ; C207810F
	s_buffer_load_dword s16, s[0:3], 0x10                 ; C2080110
	v_mov_b32_e32 v2, s6                                  ; 7E040206
	v_mov_b32_e32 v4, s8                                  ; 7E080208
	v_mov_b32_e32 v6, s10                                 ; 7E0C020A
	v_mov_b32_e32 v3, s7                                  ; 7E060207
	v_mov_b32_e32 v5, s9                                  ; 7E0A0209
	v_mov_b32_e32 v7, s11                                 ; 7E0E020B
	v_mul_f64 v[8:9], v[4:5], v[2:3]                      ; D2CA0008 00020504
	v_mul_f64 v[10:11], v[0:1], v[6:7]                    ; D2CA000A 00020D00
	v_add_f64 v[8:9], v[10:11], -v[8:9]                   ; D2C80008 4002110A
	v_div_scale_f64 v[10:11], s[6:7], v[8:9], v[8:9], 1.0 ; D2DC060A 00021108
	v_rcp_f64_e32 v[12:13], v[10:11]                      ; 7E185F0A
	v_div_scale_f64 v[14:15], vcc, 1.0, v[8:9], 1.0       ; D2DC6A0E 000210F2
	v_fma_f64 v[16:17], -v[10:11], v[12:13], 1.0          ; D2980010 23CA190A
	v_fma_f64 v[12:13], v[12:13], v[16:17], v[12:13]      ; D298000C 0432210C
	v_fma_f64 v[16:17], -v[10:11], v[12:13], 1.0          ; D2980010 23CA190A
	v_fma_f64 v[12:13], v[12:13], v[16:17], v[12:13]      ; D298000C 0432210C
	v_mul_f64 v[16:17], v[14:15], v[12:13]                ; D2CA0010 0002190E
	v_fma_f64 v[10:11], -v[10:11], v[16:17], v[14:15]     ; D298000A 243A210A
	v_div_fmas_f64 v[10:11], v[10:11], v[12:13], v[16:17] ; D2E0000A 0442190A
	v_div_fixup_f64 v[8:9], v[10:11], v[8:9], 1.0         ; D2C00008 03CA110A
	v_mul_f64 v[6:7], v[6:7], v[8:9]                      ; D2CA0006 00021106
	v_mul_f64 v[2:3], v[2:3], v[8:9]                      ; D2CA0002 00021102
	s_buffer_load_dword s6, s[0:3], 0x11                  ; C2030111
	s_waitcnt lgkmcnt(0)                                  ; BF8C007F
	v_mov_b32_e32 v10, s4                                 ; 7E140204
	v_mov_b32_e32 v11, s5                                 ; 7E160205
	v_mul_f64 v[4:5], v[4:5], v[8:9]                      ; D2CA0004 00021104
	v_mov_b32_e32 v12, s14                                ; 7E18020E
	v_add_f64 v[6:7], v[6:7], -v[10:11]                   ; D2C80006 40021506
	v_mov_b32_e32 v13, s15                                ; 7E1A020F
	v_add_f64 v[2:3], -v[12:13], -v[2:3]                  ; D2C80002 6002050C
	s_buffer_load_dword s4, s[0:3], 0x12                  ; C2020112
	s_buffer_load_dword s0, s[0:3], 0x13                  ; C2000113
	v_mov_b32_e32 v10, s16                                ; 7E140210
	v_mov_b32_e32 v11, s6                                 ; 7E160206
	v_mul_f64 v[0:1], v[0:1], v[8:9]                      ; D2CA0000 00021100
	v_add_f64 v[4:5], -v[10:11], -v[4:5]                  ; D2C80004 6002090A
	v_mul_f64 v[6:7], v[6:7], v[6:7]                      ; D2CA0006 00020D06
	v_mul_f64 v[2:3], v[2:3], v[2:3]                      ; D2CA0002 00020502
	s_waitcnt lgkmcnt(0)                                  ; BF8C007F
	v_mov_b32_e32 v8, s4                                  ; 7E100204
	v_mov_b32_e32 v9, s0                                  ; 7E120200
	v_add_f64 v[0:1], v[0:1], -v[8:9]                     ; D2C80000 40021100
	v_mul_f64 v[4:5], v[4:5], v[4:5]                      ; D2CA0004 00020904
	v_add_f64 v[2:3], v[2:3], v[6:7]                      ; D2C80002 00020D02
	v_mul_f64 v[0:1], v[0:1], v[0:1]                      ; D2CA0000 00020100
	v_add_f64 v[2:3], v[4:5], v[2:3]                      ; D2C80002 00020504
	v_mov_b32_e32 v4, s12                                 ; 7E08020C
	v_mov_b32_e32 v5, s13                                 ; 7E0A020D
	v_mul_f64 v[4:5], v[4:5], v[4:5]                      ; D2CA0004 00020904
	v_add_f64 v[0:1], v[0:1], v[2:3]                      ; D2C80000 00020500
	v_cmp_ge_f64_e32 vcc, v[4:5], v[0:1]                  ; 7C4C0104
	v_cndmask_b32_e64 v0, 0, 1.0, vcc                     ; D2000000 01A9E480
	v_cndmask_b32_e64 v1, 1.0, 0, vcc                     ; D2000001 01A900F2
	v_cvt_pkrtz_f16_f32_e32 v0, v1, v0                    ; 5E000101
	v_cvt_pkrtz_f16_f32_e64 v1, 0, 1.0                    ; D25E0001 0001E480
	exp 15, 0, 1, 1, 1, v0, v1, v0, v1                    ; F8001C0F 01000100
	s_endpgm                                              ; BF810000

*** SHADER STATS ***
SGPRS: 24
VGPRS: 20
Code Size: 444 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
  export_16bpc = 0x3
  last_cbuf = 0
  color_two_side = 0
  alpha_func = 7
  alpha_to_one = 0
  poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], CONSTANT
DCL OUT[0], COLOR
  0: MOV OUT[0], IN[0]
  1: END
; ModuleID = 'tgsi'

define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
  %22 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %5)
  %23 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %5)
  %24 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %5)
  %25 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %5)
  %26 = call i32 @llvm.SI.packf16(float %22, float %23)
  %27 = bitcast i32 %26 to float
  %28 = call i32 @llvm.SI.packf16(float %24, float %25)
  %29 = bitcast i32 %28 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %27, float %29, float %27, float %29)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.constant(i32, i32, i32) #1

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }

Shader Disassembly:

	s_mov_b32 m0, s9                    ; BEFC0309
	v_interp_mov_f32 v0, P0, 0, 0, [m0] ; C8020002
	v_interp_mov_f32 v1, P0, 1, 0, [m0] ; C8060102
	v_cvt_pkrtz_f16_f32_e32 v0, v0, v1  ; 5E000300
	v_interp_mov_f32 v1, P0, 2, 0, [m0] ; C8060202
	v_interp_mov_f32 v2, P0, 3, 0, [m0] ; C80A0302
	v_cvt_pkrtz_f16_f32_e32 v1, v1, v2  ; 5E020501
	exp 15, 0, 1, 1, 1, v0, v1, v0, v1  ; F8001C0F 01000100
	s_endpgm                            ; BF810000

*** SHADER STATS ***
SGPRS: 16
VGPRS: 4
Code Size: 40 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
  export_16bpc = 0x0
  last_cbuf = 0
  color_two_side = 0
  alpha_func = 7
  alpha_to_one = 0
  poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], LINEAR
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SVIEW[0], 2D, FLOAT
  0: TEX OUT[0], IN[0], SAMP[0], 2D
  1: END
; ModuleID = 'tgsi'

define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
  %22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
  %23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0
  %24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
  %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0
  %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %11)
  %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %11)
  %28 = bitcast float %26 to i32
  %29 = bitcast float %27 to i32
  %30 = insertelement <2 x i32> undef, i32 %28, i32 0
  %31 = insertelement <2 x i32> %30, i32 %29, i32 1
  %32 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %31, <32 x i8> %23, <16 x i8> %25, i32 2)
  %33 = extractelement <4 x float> %32, i32 0
  %34 = extractelement <4 x float> %32, i32 1
  %35 = extractelement <4 x float> %32, i32 2
  %36 = extractelement <4 x float> %32, i32 3
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %33, float %34, float %35, float %36)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }

!0 = !{!"const", null, i32 1}

Shader Disassembly:

	s_wqm_b64 exec, exec                                                   ; BEFE0A7E
	s_load_dwordx8 s[12:19], s[6:7], 0x0                                   ; C0C60700
	s_load_dwordx4 s[0:3], s[4:5], 0x0                                     ; C0800500
	s_mov_b32 m0, s9                                                       ; BEFC0309
	v_interp_p1_f32 v2, v0, 0, 0, [m0]                                     ; C8080000
	v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0]                               ; C8090001
	v_interp_p1_f32 v3, v0, 1, 0, [m0]                                     ; C80C0100
	v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0]                               ; C80D0101
	s_waitcnt lgkmcnt(0)                                                   ; BF8C007F
	image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[0:3] ; F0800F00 00030002
	s_waitcnt vmcnt(0)                                                     ; BF8C0770
	exp 15, 0, 0, 1, 1, v0, v1, v2, v3                                     ; F800180F 03020100
	s_endpgm                                                               ; BF810000

*** SHADER STATS ***
SGPRS: 24
VGPRS: 4
Code Size: 60 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
-------------- next part --------------
SHADER KEY
  export_16bpc = 0x3
  last_cbuf = 0
  color_two_side = 0
  alpha_func = 7
  alpha_to_one = 0
  poly_stipple = 0
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL OUT[0], COLOR
DCL CONST[0..4]
DCL TEMP[0..5], LOCAL
IMM[0] FLT32 {    0.0000,     1.0000,     0.0000,     0.0000}
  0: DNEG TEMP[0].xy, CONST[0].zwzw
  1: DNEG TEMP[1].xy, CONST[1].xyxy
  2: DMUL TEMP[2].xy, CONST[1].xyxy, CONST[0].zwzw
  3: DNEG TEMP[2].xy, TEMP[2].xyxy
  4: DMAD TEMP[2].xy, CONST[0].xyxy, CONST[1].zwzw, TEMP[2].xyxy
  5: DRCP TEMP[3].xy, TEMP[2].xyxy
  6: DNEG TEMP[4].xy, CONST[3].xyxy
  7: DNEG TEMP[4].zw, CONST[3].zwzw
  8: DMAD TEMP[5].xy, CONST[1].zwzw, TEMP[3].xyxy, TEMP[4].xyxy
  9: DMAD TEMP[5].zw, TEMP[0].xyxy, TEMP[3].xyxy, TEMP[4].zwzw
 10: DRCP TEMP[0].xy, TEMP[2].xyxy
 11: DNEG TEMP[2].xy, CONST[4].xyxy
 12: DNEG TEMP[2].zw, CONST[4].zwzw
 13: DMAD TEMP[1].xy, TEMP[1].xyxy, TEMP[0].xyxy, TEMP[2].xyxy
 14: DMAD TEMP[1].zw, CONST[0].xyxy, TEMP[0].xyxy, TEMP[2].zwzw
 15: DMUL TEMP[0].xy, TEMP[5].xyxy, TEMP[5].xyxy
 16: DMAD TEMP[0].xy, TEMP[5].zwzw, TEMP[5].zwzw, TEMP[0].xyxy
 17: DMAD TEMP[0].xy, TEMP[1].xyxy, TEMP[1].xyxy, TEMP[0].xyxy
 18: DMAD TEMP[0].xy, TEMP[1].zwzw, TEMP[1].zwzw, TEMP[0].xyxy
 19: DMUL TEMP[1].xy, CONST[2].xyxy, CONST[2].xyxy
 20: DSGE TEMP[0].x, TEMP[1].xyxy, TEMP[0].xyxy
 21: UIF TEMP[0].xxxx :0
 22:   MOV TEMP[0], IMM[0].xyxy
 23: ELSE :0
 24:   MOV TEMP[0], IMM[0].yxxy
 25: ENDIF
 26: MOV OUT[0], TEMP[0]
 27: END
; ModuleID = 'tgsi'

define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
  %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
  %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
  %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
  %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4)
  %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8)
  %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12)
  %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16)
  %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20)
  %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24)
  %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28)
  %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32)
  %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36)
  %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48)
  %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52)
  %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56)
  %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60)
  %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64)
  %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68)
  %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72)
  %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76)
  %42 = bitcast float %26 to i32
  %43 = insertelement <2 x i32> undef, i32 %42, i32 0
  %44 = bitcast float %27 to i32
  %45 = insertelement <2 x i32> %43, i32 %44, i32 1
  %46 = bitcast <2 x i32> %45 to double
  %47 = bitcast float %28 to i32
  %48 = insertelement <2 x i32> undef, i32 %47, i32 0
  %49 = bitcast float %29 to i32
  %50 = insertelement <2 x i32> %48, i32 %49, i32 1
  %51 = bitcast <2 x i32> %50 to double
  %52 = bitcast float %28 to i32
  %53 = insertelement <2 x i32> undef, i32 %52, i32 0
  %54 = bitcast float %29 to i32
  %55 = insertelement <2 x i32> %53, i32 %54, i32 1
  %56 = bitcast <2 x i32> %55 to double
  %57 = bitcast float %26 to i32
  %58 = insertelement <2 x i32> undef, i32 %57, i32 0
  %59 = bitcast float %27 to i32
  %60 = insertelement <2 x i32> %58, i32 %59, i32 1
  %61 = bitcast <2 x i32> %60 to double
  %62 = fmul double %56, %61
  %63 = bitcast float %24 to i32
  %64 = insertelement <2 x i32> undef, i32 %63, i32 0
  %65 = bitcast float %25 to i32
  %66 = insertelement <2 x i32> %64, i32 %65, i32 1
  %67 = bitcast <2 x i32> %66 to double
  %68 = bitcast float %30 to i32
  %69 = insertelement <2 x i32> undef, i32 %68, i32 0
  %70 = bitcast float %31 to i32
  %71 = insertelement <2 x i32> %69, i32 %70, i32 1
  %72 = bitcast <2 x i32> %71 to double
  %73 = fmul double %67, %72
  %74 = fsub double %73, %62
  %75 = fdiv double 1.000000e+00, %74
  %76 = bitcast float %34 to i32
  %77 = insertelement <2 x i32> undef, i32 %76, i32 0
  %78 = bitcast float %35 to i32
  %79 = insertelement <2 x i32> %77, i32 %78, i32 1
  %80 = bitcast <2 x i32> %79 to double
  %81 = bitcast float %36 to i32
  %82 = insertelement <2 x i32> undef, i32 %81, i32 0
  %83 = bitcast float %37 to i32
  %84 = insertelement <2 x i32> %82, i32 %83, i32 1
  %85 = bitcast <2 x i32> %84 to double
  %86 = fsub double -0.000000e+00, %85
  %87 = bitcast float %30 to i32
  %88 = insertelement <2 x i32> undef, i32 %87, i32 0
  %89 = bitcast float %31 to i32
  %90 = insertelement <2 x i32> %88, i32 %89, i32 1
  %91 = bitcast <2 x i32> %90 to double
  %92 = fmul double %91, %75
  %93 = fsub double %92, %80
  %94 = fmul double %46, %75
  %95 = fsub double %86, %94
  %96 = fdiv double 1.000000e+00, %74
  %97 = bitcast float %38 to i32
  %98 = insertelement <2 x i32> undef, i32 %97, i32 0
  %99 = bitcast float %39 to i32
  %100 = insertelement <2 x i32> %98, i32 %99, i32 1
  %101 = bitcast <2 x i32> %100 to double
  %102 = fsub double -0.000000e+00, %101
  %103 = bitcast float %40 to i32
  %104 = insertelement <2 x i32> undef, i32 %103, i32 0
  %105 = bitcast float %41 to i32
  %106 = insertelement <2 x i32> %104, i32 %105, i32 1
  %107 = bitcast <2 x i32> %106 to double
  %108 = fmul double %51, %96
  %109 = fsub double %102, %108
  %110 = bitcast float %24 to i32
  %111 = insertelement <2 x i32> undef, i32 %110, i32 0
  %112 = bitcast float %25 to i32
  %113 = insertelement <2 x i32> %111, i32 %112, i32 1
  %114 = bitcast <2 x i32> %113 to double
  %115 = fmul double %114, %96
  %116 = fsub double %115, %107
  %117 = fmul double %93, %93
  %118 = fmul double %95, %95
  %119 = fadd double %118, %117
  %120 = fmul double %109, %109
  %121 = fadd double %120, %119
  %122 = fmul double %116, %116
  %123 = fadd double %122, %121
  %124 = bitcast float %32 to i32
  %125 = insertelement <2 x i32> undef, i32 %124, i32 0
  %126 = bitcast float %33 to i32
  %127 = insertelement <2 x i32> %125, i32 %126, i32 1
  %128 = bitcast <2 x i32> %127 to double
  %129 = bitcast float %32 to i32
  %130 = insertelement <2 x i32> undef, i32 %129, i32 0
  %131 = bitcast float %33 to i32
  %132 = insertelement <2 x i32> %130, i32 %131, i32 1
  %133 = bitcast <2 x i32> %132 to double
  %134 = fmul double %128, %133
  %135 = fcmp oge double %134, %123
  %. = select i1 %135, float 1.000000e+00, float 0.000000e+00
  %.24 = select i1 %135, float 0.000000e+00, float 1.000000e+00
  %136 = call i32 @llvm.SI.packf16(float %.24, float %.)
  %137 = bitcast i32 %136 to float
  %138 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 1.000000e+00)
  %139 = bitcast i32 %138 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %137, float %139, float %137, float %139)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }

!0 = !{!"const", null, i32 1}

Shader Disassembly:

	s_load_dwordx4 s[0:3], s[2:3], 0x0                    ; C0800300
	s_waitcnt lgkmcnt(0)                                  ; BF8C007F
	s_buffer_load_dword s4, s[0:3], 0x0                   ; C2020100
	s_buffer_load_dword s5, s[0:3], 0x1                   ; C2028101
	s_buffer_load_dword s6, s[0:3], 0x2                   ; C2030102
	s_buffer_load_dword s7, s[0:3], 0x3                   ; C2038103
	s_buffer_load_dword s8, s[0:3], 0x4                   ; C2040104
	s_buffer_load_dword s9, s[0:3], 0x5                   ; C2048105
	s_buffer_load_dword s10, s[0:3], 0x6                  ; C2050106
	s_buffer_load_dword s11, s[0:3], 0x7                  ; C2058107
	s_buffer_load_dword s12, s[0:3], 0x8                  ; C2060108
	s_buffer_load_dword s13, s[0:3], 0x9                  ; C2068109
	s_waitcnt lgkmcnt(0)                                  ; BF8C007F
	v_mov_b32_e32 v0, s4                                  ; 7E000204
	s_buffer_load_dword s4, s[0:3], 0xc                   ; C202010C
	v_mov_b32_e32 v1, s5                                  ; 7E020205
	s_buffer_load_dword s5, s[0:3], 0xd                   ; C202810D
	s_buffer_load_dword s14, s[0:3], 0xe                  ; C207010E
	s_buffer_load_dword s15, s[0:3], 0xf                  ; C207810F
	s_buffer_load_dword s16, s[0:3], 0x10                 ; C2080110
	v_mov_b32_e32 v2, s6                                  ; 7E040206
	v_mov_b32_e32 v4, s8                                  ; 7E080208
	v_mov_b32_e32 v6, s10                                 ; 7E0C020A
	v_mov_b32_e32 v3, s7                                  ; 7E060207
	v_mov_b32_e32 v5, s9                                  ; 7E0A0209
	v_mov_b32_e32 v7, s11                                 ; 7E0E020B
	v_mul_f64 v[8:9], v[4:5], v[2:3]                      ; D2CA0008 00020504
	v_mul_f64 v[10:11], v[0:1], v[6:7]                    ; D2CA000A 00020D00
	v_add_f64 v[8:9], v[10:11], -v[8:9]                   ; D2C80008 4002110A
	v_div_scale_f64 v[10:11], s[6:7], v[8:9], v[8:9], 1.0 ; D2DC060A 03CA1108
	v_rcp_f64_e32 v[12:13], v[10:11]                      ; 7E185F0A
	v_div_scale_f64 v[14:15], vcc, 1.0, v[8:9], 1.0       ; D2DC6A0E 03CA10F2
	v_fma_f64 v[16:17], -v[10:11], v[12:13], 1.0          ; D2980010 23CA190A
	v_fma_f64 v[12:13], v[12:13], v[16:17], v[12:13]      ; D298000C 0432210C
	v_fma_f64 v[16:17], -v[10:11], v[12:13], 1.0          ; D2980010 23CA190A
	v_fma_f64 v[12:13], v[12:13], v[16:17], v[12:13]      ; D298000C 0432210C
	v_mul_f64 v[16:17], v[14:15], v[12:13]                ; D2CA0010 0002190E
	v_fma_f64 v[10:11], -v[10:11], v[16:17], v[14:15]     ; D298000A 243A210A
	v_div_fmas_f64 v[10:11], v[10:11], v[12:13], v[16:17] ; D2E0000A 0442190A
	v_div_fixup_f64 v[8:9], v[10:11], v[8:9], 1.0         ; D2C00008 03CA110A
	v_mul_f64 v[6:7], v[6:7], v[8:9]                      ; D2CA0006 00021106
	v_mul_f64 v[2:3], v[2:3], v[8:9]                      ; D2CA0002 00021102
	s_buffer_load_dword s6, s[0:3], 0x11                  ; C2030111
	s_waitcnt lgkmcnt(0)                                  ; BF8C007F
	v_mov_b32_e32 v10, s4                                 ; 7E140204
	v_mov_b32_e32 v11, s5                                 ; 7E160205
	v_mul_f64 v[4:5], v[4:5], v[8:9]                      ; D2CA0004 00021104
	v_mov_b32_e32 v12, s14                                ; 7E18020E
	v_add_f64 v[6:7], v[6:7], -v[10:11]                   ; D2C80006 40021506
	v_mov_b32_e32 v13, s15                                ; 7E1A020F
	v_add_f64 v[2:3], -v[12:13], -v[2:3]                  ; D2C80002 6002050C
	s_buffer_load_dword s4, s[0:3], 0x12                  ; C2020112
	s_buffer_load_dword s0, s[0:3], 0x13                  ; C2000113
	v_mov_b32_e32 v10, s16                                ; 7E140210
	v_mov_b32_e32 v11, s6                                 ; 7E160206
	v_mul_f64 v[0:1], v[0:1], v[8:9]                      ; D2CA0000 00021100
	v_add_f64 v[4:5], -v[10:11], -v[4:5]                  ; D2C80004 6002090A
	v_mul_f64 v[6:7], v[6:7], v[6:7]                      ; D2CA0006 00020D06
	v_mul_f64 v[2:3], v[2:3], v[2:3]                      ; D2CA0002 00020502
	s_waitcnt lgkmcnt(0)                                  ; BF8C007F
	v_mov_b32_e32 v8, s4                                  ; 7E100204
	v_mov_b32_e32 v9, s0                                  ; 7E120200
	v_add_f64 v[0:1], v[0:1], -v[8:9]                     ; D2C80000 40021100
	v_mul_f64 v[4:5], v[4:5], v[4:5]                      ; D2CA0004 00020904
	v_add_f64 v[2:3], v[2:3], v[6:7]                      ; D2C80002 00020D02
	v_mul_f64 v[0:1], v[0:1], v[0:1]                      ; D2CA0000 00020100
	v_add_f64 v[2:3], v[4:5], v[2:3]                      ; D2C80002 00020504
	v_mov_b32_e32 v4, s12                                 ; 7E08020C
	v_mov_b32_e32 v5, s13                                 ; 7E0A020D
	v_mul_f64 v[4:5], v[4:5], v[4:5]                      ; D2CA0004 00020904
	v_add_f64 v[0:1], v[0:1], v[2:3]                      ; D2C80000 00020500
	v_cmp_ge_f64_e32 vcc, v[4:5], v[0:1]                  ; 7C4C0104
	v_cndmask_b32_e64 v0, 0, 1.0, vcc                     ; D2000000 01A9E480
	v_cndmask_b32_e64 v1, 1.0, 0, vcc                     ; D2000001 01A900F2
	v_cvt_pkrtz_f16_f32_e32 v0, v1, v0                    ; 5E000101
	v_cvt_pkrtz_f16_f32_e64 v1, 0, 1.0                    ; D25E0001 0001E480
	exp 15, 0, 1, 1, 1, v0, v1, v0, v1                    ; F8001C0F 01000100
	s_endpgm                                              ; BF810000

*** SHADER STATS ***
SGPRS: 24
VGPRS: 20
Code Size: 444 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
  export_16bpc = 0x3
  last_cbuf = 0
  color_two_side = 0
  alpha_func = 7
  alpha_to_one = 0
  poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], CONSTANT
DCL OUT[0], COLOR
  0: MOV OUT[0], IN[0]
  1: END
; ModuleID = 'tgsi'

define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
  %22 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %5)
  %23 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %5)
  %24 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %5)
  %25 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %5)
  %26 = call i32 @llvm.SI.packf16(float %22, float %23)
  %27 = bitcast i32 %26 to float
  %28 = call i32 @llvm.SI.packf16(float %24, float %25)
  %29 = bitcast i32 %28 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %27, float %29, float %27, float %29)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.constant(i32, i32, i32) #1

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }

Shader Disassembly:

	s_mov_b32 m0, s9                    ; BEFC0309
	v_interp_mov_f32 v0, P0, 0, 0, [m0] ; C8020002
	v_interp_mov_f32 v1, P0, 1, 0, [m0] ; C8060102
	v_cvt_pkrtz_f16_f32_e32 v0, v0, v1  ; 5E000300
	v_interp_mov_f32 v1, P0, 2, 0, [m0] ; C8060202
	v_interp_mov_f32 v2, P0, 3, 0, [m0] ; C80A0302
	v_cvt_pkrtz_f16_f32_e32 v1, v1, v2  ; 5E020501
	exp 15, 0, 1, 1, 1, v0, v1, v0, v1  ; F8001C0F 01000100
	s_endpgm                            ; BF810000

*** SHADER STATS ***
SGPRS: 16
VGPRS: 4
Code Size: 40 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
  export_16bpc = 0x0
  last_cbuf = 0
  color_two_side = 0
  alpha_func = 7
  alpha_to_one = 0
  poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], LINEAR
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SVIEW[0], 2D, FLOAT
  0: TEX OUT[0], IN[0], SAMP[0], 2D
  1: END
; ModuleID = 'tgsi'

define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
  %22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
  %23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0
  %24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
  %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0
  %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %11)
  %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %11)
  %28 = bitcast float %26 to i32
  %29 = bitcast float %27 to i32
  %30 = insertelement <2 x i32> undef, i32 %28, i32 0
  %31 = insertelement <2 x i32> %30, i32 %29, i32 1
  %32 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %31, <32 x i8> %23, <16 x i8> %25, i32 2)
  %33 = extractelement <4 x float> %32, i32 0
  %34 = extractelement <4 x float> %32, i32 1
  %35 = extractelement <4 x float> %32, i32 2
  %36 = extractelement <4 x float> %32, i32 3
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %33, float %34, float %35, float %36)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }

!0 = !{!"const", null, i32 1}

Shader Disassembly:

	s_wqm_b64 exec, exec                                                   ; BEFE0A7E
	s_load_dwordx8 s[12:19], s[6:7], 0x0                                   ; C0C60700
	s_load_dwordx4 s[0:3], s[4:5], 0x0                                     ; C0800500
	s_mov_b32 m0, s9                                                       ; BEFC0309
	v_interp_p1_f32 v2, v0, 0, 0, [m0]                                     ; C8080000
	v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0]                               ; C8090001
	v_interp_p1_f32 v3, v0, 1, 0, [m0]                                     ; C80C0100
	v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0]                               ; C80D0101
	s_waitcnt lgkmcnt(0)                                                   ; BF8C007F
	image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[0:3] ; F0800F00 00030002
	s_waitcnt vmcnt(0)                                                     ; BF8C0770
	exp 15, 0, 0, 1, 1, v0, v1, v2, v3                                     ; F800180F 03020100
	s_endpgm                                                               ; BF810000

*** SHADER STATS ***
SGPRS: 24
VGPRS: 4
Code Size: 60 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************