AMDGPU/SI: Fix bitcast between v2f32 and f64
Tom Stellard via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 15 09:19:18 PST 2015
On Fri, Dec 11, 2015 at 04:37:47PM +0900, Michel Dänzer wrote:
>
> Attached is a failure generated by the radeonsi driver with the piglit
> graphics test fs-abs-dvec2.shader_test and a patch fixing it (and
> similar failures in many other fp64 tests). I haven't been able to
> produce a simple test case for this.
>
r255657.
-Tom
>
> --
> Earthling Michel Dänzer | http://www.amd.com
> Libre software enthusiast | Mesa and X developer
> From 6213cb7c9c3bdc2d6306c278c2b6466a27dec71c Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Michel=20D=C3=A4nzer?= <michel.daenzer at amd.com>
> Date: Fri, 11 Dec 2015 16:10:30 +0900
> Subject: [PATCH] AMDGPU/SI: Fix bitcast between v2f32 and f64
> MIME-Version: 1.0
> Content-Type: text/plain; charset=UTF-8
> Content-Transfer-Encoding: 8bit
>
> The radeonsi fp64 support can hit these now that some redundant bitcasts
> are folded.
>
> Signed-off-by: Michel D??nzer <michel.daenzer at amd.com>
> ---
> lib/Target/AMDGPU/SIInstructions.td | 4 ++++
> 1 file changed, 4 insertions(+)
>
> diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td
> index dcc74f7..d45762b 100644
> --- a/lib/Target/AMDGPU/SIInstructions.td
> +++ b/lib/Target/AMDGPU/SIInstructions.td
> @@ -2561,7 +2561,9 @@ def : BitConvert <v2i32, i64, VReg_64>;
> def : BitConvert <i64, v2i32, VReg_64>;
> def : BitConvert <v2f32, i64, VReg_64>;
> def : BitConvert <i64, v2f32, VReg_64>;
> +def : BitConvert <v2f32, f64, VReg_64>;
> def : BitConvert <v2i32, f64, VReg_64>;
> +def : BitConvert <f64, v2f32, VReg_64>;
> def : BitConvert <f64, v2i32, VReg_64>;
> def : BitConvert <v4f32, v4i32, VReg_128>;
> def : BitConvert <v4i32, v4f32, VReg_128>;
> @@ -2570,7 +2572,9 @@ def : BitConvert <v4i32, v4f32, VReg_128>;
> def : BitConvert <v2i64, v4i32, SReg_128>;
> def : BitConvert <v4i32, v2i64, SReg_128>;
>
> +def : BitConvert <v2f64, v4f32, VReg_128>;
> def : BitConvert <v2f64, v4i32, VReg_128>;
> +def : BitConvert <v4f32, v2f64, VReg_128>;
> def : BitConvert <v4i32, v2f64, VReg_128>;
>
>
> --
> 2.6.2
>
> SHADER KEY
> instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
> as_es = 0
> as_ls = 0
> export_prim_id = 0
> VERT
> DCL IN[0]
> DCL OUT[0], POSITION
> 0: MOV OUT[0], IN[0]
> 1: END
> ; ModuleID = 'tgsi'
>
> define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
> main_body:
> %12 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
> %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0
> %14 = add i32 %5, %8
> %15 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %13, i32 0, i32 %14)
> %16 = extractelement <4 x float> %15, i32 0
> %17 = extractelement <4 x float> %15, i32 1
> %18 = extractelement <4 x float> %15, i32 2
> %19 = extractelement <4 x float> %15, i32 3
> call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %16, float %17, float %18, float %19)
> ret void
> }
>
> ; Function Attrs: nounwind readnone
> declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
>
> declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
>
> attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
> attributes #1 = { nounwind readnone }
>
> !0 = !{!"const", null, i32 1}
>
> Shader Disassembly:
>
> s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900
> v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A
> s_waitcnt lgkmcnt(0) ; BF8C007F
> buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen ; E00C2000 80000000
> s_waitcnt vmcnt(0) ; BF8C0770
> exp 15, 12, 0, 1, 0, v0, v1, v2, v3 ; F80008CF 03020100
> s_endpgm ; BF810000
>
> *** SHADER STATS ***
> SGPRS: 16
> VGPRS: 4
> Code Size: 36 bytes
> LDS: 0 blocks
> Scratch: 0 bytes per wave
> ********************
> SHADER KEY
> export_16bpc = 0x3
> last_cbuf = 0
> color_two_side = 0
> alpha_func = 7
> alpha_to_one = 0
> poly_stipple = 0
> clamp_color = 0
> FRAG
> PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
> DCL OUT[0], COLOR
> DCL CONST[0..2]
> DCL TEMP[0..2], LOCAL
> IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000}
> 0: DABS TEMP[0].xy, CONST[0].xyxy
> 1: DABS TEMP[0].zw, CONST[0].zwzw
> 2: DNEG TEMP[1].xy, CONST[2].xyxy
> 3: DNEG TEMP[1].zw, CONST[2].zwzw
> 4: DADD TEMP[2].xy, TEMP[0].xyxy, TEMP[1].xyxy
> 5: DADD TEMP[2].zw, TEMP[0].zwzw, TEMP[1].zwzw
> 6: DMUL TEMP[0].xy, TEMP[2].zwzw, TEMP[2].zwzw
> 7: DFMA TEMP[0].xy, TEMP[2].xyxy, TEMP[2].xyxy, TEMP[0].xyxy
> 8: DSQRT TEMP[0].xy, TEMP[0].xyxy
> 9: DSGE TEMP[0].x, CONST[1].xyxy, TEMP[0].xyxy
> 10: UIF TEMP[0].xxxx :0
> 11: MOV TEMP[0], IMM[0].xyxy
> 12: ELSE :0
> 13: MOV TEMP[0], IMM[0].yxxy
> 14: ENDIF
> 15: MOV OUT[0], TEMP[0]
> 16: END
> ; ModuleID = 'tgsi'
>
> define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
> main_body:
> %23 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
> %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0
> %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 0)
> %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 4)
> %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 8)
> %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 12)
> %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 16)
> %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 20)
> %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 32)
> %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 36)
> %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 40)
> %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 44)
> %35 = bitcast float %25 to i32
> %36 = insertelement <2 x i32> undef, i32 %35, i32 0
> %37 = bitcast float %26 to i32
> %38 = insertelement <2 x i32> %36, i32 %37, i32 1
> %39 = bitcast <2 x i32> %38 to double
> %40 = call double @llvm.fabs.f64(double %39)
> %41 = bitcast float %27 to i32
> %42 = insertelement <2 x i32> undef, i32 %41, i32 0
> %43 = bitcast float %28 to i32
> %44 = insertelement <2 x i32> %42, i32 %43, i32 1
> %45 = bitcast <2 x i32> %44 to double
> %46 = call double @llvm.fabs.f64(double %45)
> %47 = bitcast float %31 to i32
> %48 = insertelement <2 x i32> undef, i32 %47, i32 0
> %49 = bitcast float %32 to i32
> %50 = insertelement <2 x i32> %48, i32 %49, i32 1
> %51 = bitcast <2 x i32> %50 to double
> %52 = fsub double -0.000000e+00, %51
> %53 = bitcast float %33 to i32
> %54 = insertelement <2 x i32> undef, i32 %53, i32 0
> %55 = bitcast float %34 to i32
> %56 = insertelement <2 x i32> %54, i32 %55, i32 1
> %57 = bitcast <2 x i32> %56 to double
> %58 = fsub double -0.000000e+00, %57
> %bc20 = bitcast double %40 to <2 x i32>
> %bc = bitcast double %40 to <2 x i32>
> %59 = shufflevector <2 x i32> %bc20, <2 x i32> %bc, <2 x i32> <i32 0, i32 3>
> %60 = bitcast <2 x i32> %59 to double
> %bc21 = bitcast double %52 to <2 x i32>
> %bc22 = bitcast double %52 to <2 x i32>
> %61 = shufflevector <2 x i32> %bc21, <2 x i32> %bc22, <2 x i32> <i32 0, i32 3>
> %62 = bitcast <2 x i32> %61 to double
> %63 = fadd double %60, %62
> %bc23 = bitcast double %63 to <2 x float>
> %64 = extractelement <2 x float> %bc23, i32 0
> %bc24 = bitcast double %63 to <2 x float>
> %65 = extractelement <2 x float> %bc24, i32 1
> %bc25 = bitcast double %46 to <2 x i32>
> %bc26 = bitcast double %46 to <2 x i32>
> %66 = shufflevector <2 x i32> %bc25, <2 x i32> %bc26, <2 x i32> <i32 0, i32 3>
> %67 = bitcast <2 x i32> %66 to double
> %bc27 = bitcast double %58 to <2 x i32>
> %bc28 = bitcast double %58 to <2 x i32>
> %68 = shufflevector <2 x i32> %bc27, <2 x i32> %bc28, <2 x i32> <i32 0, i32 3>
> %69 = bitcast <2 x i32> %68 to double
> %70 = fadd double %67, %69
> %bc29 = bitcast double %70 to <2 x float>
> %71 = extractelement <2 x float> %bc29, i32 0
> %bc30 = bitcast double %70 to <2 x float>
> %72 = extractelement <2 x float> %bc30, i32 1
> %73 = bitcast float %71 to i32
> %74 = insertelement <2 x i32> undef, i32 %73, i32 0
> %75 = bitcast float %72 to i32
> %76 = insertelement <2 x i32> %74, i32 %75, i32 1
> %77 = bitcast <2 x i32> %76 to double
> %78 = bitcast float %71 to i32
> %79 = insertelement <2 x i32> undef, i32 %78, i32 0
> %80 = bitcast float %72 to i32
> %81 = insertelement <2 x i32> %79, i32 %80, i32 1
> %82 = bitcast <2 x i32> %81 to double
> %83 = fmul double %77, %82
> %84 = bitcast float %64 to i32
> %85 = insertelement <2 x i32> undef, i32 %84, i32 0
> %86 = bitcast float %65 to i32
> %87 = insertelement <2 x i32> %85, i32 %86, i32 1
> %88 = bitcast <2 x i32> %87 to double
> %89 = bitcast float %64 to i32
> %90 = insertelement <2 x i32> undef, i32 %89, i32 0
> %91 = bitcast float %65 to i32
> %92 = insertelement <2 x i32> %90, i32 %91, i32 1
> %93 = bitcast <2 x i32> %92 to double
> %bc33 = bitcast double %83 to <2 x i32>
> %bc34 = bitcast double %83 to <2 x i32>
> %94 = shufflevector <2 x i32> %bc33, <2 x i32> %bc34, <2 x i32> <i32 0, i32 3>
> %95 = bitcast <2 x i32> %94 to double
> %96 = call double @llvm.fma.f64(double %88, double %93, double %95)
> %bc37 = bitcast double %96 to <2 x i32>
> %bc38 = bitcast double %96 to <2 x i32>
> %97 = shufflevector <2 x i32> %bc37, <2 x i32> %bc38, <2 x i32> <i32 0, i32 3>
> %98 = bitcast <2 x i32> %97 to double
> %99 = call double @llvm.sqrt.f64(double %98)
> %100 = bitcast float %29 to i32
> %101 = insertelement <2 x i32> undef, i32 %100, i32 0
> %102 = bitcast float %30 to i32
> %103 = insertelement <2 x i32> %101, i32 %102, i32 1
> %104 = bitcast <2 x i32> %103 to double
> %bc41 = bitcast double %99 to <2 x i32>
> %bc42 = bitcast double %99 to <2 x i32>
> %105 = shufflevector <2 x i32> %bc41, <2 x i32> %bc42, <2 x i32> <i32 0, i32 3>
> %106 = bitcast <2 x i32> %105 to double
> %107 = fcmp oge double %104, %106
> %. = select i1 %107, float 1.000000e+00, float 0.000000e+00
> %.12 = select i1 %107, float 0.000000e+00, float 1.000000e+00
> %108 = call i32 @llvm.SI.packf16(float %.12, float %.)
> %109 = bitcast i32 %108 to float
> %110 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 1.000000e+00)
> %111 = bitcast i32 %110 to float
> call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %109, float %111, float %109, float %111)
> ret void
> }
>
> ; Function Attrs: nounwind readnone
> declare float @llvm.SI.load.const(<16 x i8>, i32) #1
>
> ; Function Attrs: nounwind readnone
> declare double @llvm.fabs.f64(double) #1
>
> ; Function Attrs: nounwind readnone
> declare double @llvm.fma.f64(double, double, double) #1
>
> ; Function Attrs: nounwind readnone
> declare double @llvm.sqrt.f64(double) #1
>
> ; Function Attrs: nounwind readnone
> declare i32 @llvm.SI.packf16(float, float) #1
>
> declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
>
> attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
> attributes #1 = { nounwind readnone }
>
> !0 = !{!"const", null, i32 1}
>
> Shader Disassembly:
>
> s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
> s_waitcnt lgkmcnt(0) ; BF8C007F
> s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100
> s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102
> s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104
> s_buffer_load_dword s10, s[0:3], 0x8 ; C2050108
> s_buffer_load_dword s11, s[0:3], 0x9 ; C2058109
> s_buffer_load_dword s12, s[0:3], 0xa ; C206010A
> s_buffer_load_dword s13, s[0:3], 0xb ; C206810B
> s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103
> s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101
> s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105
> s_waitcnt lgkmcnt(0) ; BF8C007F
> v_mov_b32_e32 v0, s10 ; 7E00020A
> v_mov_b32_e32 v1, s11 ; 7E02020B
> v_mov_b32_e32 v2, s12 ; 7E04020C
> v_mov_b32_e32 v3, s13 ; 7E06020D
> v_add_f64 v[2:3], |s[6:7]|, -v[2:3] ; D2C80102 40020406
> v_add_f64 v[0:1], |s[4:5]|, -v[0:1] ; D2C80100 40020004
> v_mul_f64 v[2:3], v[2:3], v[2:3] ; D2CA0002 00020502
> v_fma_f64 v[0:1], v[0:1], v[0:1], v[2:3] ; D2980000 040A0100
> v_sqrt_f64_e32 v[0:1], v[0:1] ; 7E006900
> v_cmp_ge_f64_e32 vcc, s[8:9], v[0:1] ; 7C4C0008
> v_cndmask_b32_e64 v0, 0, 1.0, vcc ; D2000000 01A9E480
> v_cndmask_b32_e64 v1, 1.0, 0, vcc ; D2000001 01A900F2
> v_cvt_pkrtz_f16_f32_e32 v0, v1, v0 ; 5E000101
> v_cvt_pkrtz_f16_f32_e64 v1, 0, 1.0 ; D25E0001 0001E480
> exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100
> s_endpgm ; BF810000
>
> *** SHADER STATS ***
> SGPRS: 16
> VGPRS: 4
> Code Size: 148 bytes
> LDS: 0 blocks
> Scratch: 0 bytes per wave
> ********************
> SHADER KEY
> instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
> as_es = 0
> as_ls = 0
> export_prim_id = 0
> VERT
> DCL IN[0]
> DCL IN[1]
> DCL OUT[0], POSITION
> DCL OUT[1], GENERIC[0]
> 0: MOV OUT[0], IN[0]
> 1: MOV OUT[1], IN[1]
> 2: END
> ; ModuleID = 'tgsi'
>
> define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
> main_body:
> %12 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
> %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0
> %14 = add i32 %5, %8
> %15 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %13, i32 0, i32 %14)
> %16 = extractelement <4 x float> %15, i32 0
> %17 = extractelement <4 x float> %15, i32 1
> %18 = extractelement <4 x float> %15, i32 2
> %19 = extractelement <4 x float> %15, i32 3
> %20 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
> %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20, align 16, !tbaa !0
> %22 = add i32 %5, %8
> %23 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %21, i32 0, i32 %22)
> %24 = extractelement <4 x float> %23, i32 0
> %25 = extractelement <4 x float> %23, i32 1
> %26 = extractelement <4 x float> %23, i32 2
> %27 = extractelement <4 x float> %23, i32 3
> call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %24, float %25, float %26, float %27)
> call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %16, float %17, float %18, float %19)
> ret void
> }
>
> ; Function Attrs: nounwind readnone
> declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
>
> declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
>
> attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
> attributes #1 = { nounwind readnone }
>
> !0 = !{!"const", null, i32 1}
>
> Shader Disassembly:
>
> s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900
> s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904
> v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A
> s_waitcnt lgkmcnt(0) ; BF8C007F
> buffer_load_format_xyzw v[1:4], v0, s[0:3], 0 idxen ; E00C2000 80000100
> buffer_load_format_xyzw v[5:8], v0, s[4:7], 0 idxen ; E00C2000 80010500
> s_waitcnt vmcnt(0) ; BF8C0770
> exp 15, 32, 0, 0, 0, v5, v6, v7, v8 ; F800020F 08070605
> exp 15, 12, 0, 1, 0, v1, v2, v3, v4 ; F80008CF 04030201
> s_endpgm ; BF810000
>
> *** SHADER STATS ***
> SGPRS: 16
> VGPRS: 12
> Code Size: 56 bytes
> LDS: 0 blocks
> Scratch: 0 bytes per wave
> ********************
> SHADER KEY
> export_16bpc = 0x3
> last_cbuf = 0
> color_two_side = 0
> alpha_func = 7
> alpha_to_one = 0
> poly_stipple = 0
> clamp_color = 0
> FRAG
> DCL IN[0], GENERIC[0], CONSTANT
> DCL OUT[0], COLOR
> 0: MOV OUT[0], IN[0]
> 1: END
> ; ModuleID = 'tgsi'
>
> define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
> main_body:
> %23 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %6)
> %24 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %6)
> %25 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %6)
> %26 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %6)
> %27 = call i32 @llvm.SI.packf16(float %23, float %24)
> %28 = bitcast i32 %27 to float
> %29 = call i32 @llvm.SI.packf16(float %25, float %26)
> %30 = bitcast i32 %29 to float
> call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %28, float %30, float %28, float %30)
> ret void
> }
>
> ; Function Attrs: nounwind readnone
> declare float @llvm.SI.fs.constant(i32, i32, i32) #1
>
> ; Function Attrs: nounwind readnone
> declare i32 @llvm.SI.packf16(float, float) #1
>
> declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
>
> attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
> attributes #1 = { nounwind readnone }
>
> Shader Disassembly:
>
> s_mov_b32 m0, s10 ; BEFC030A
> v_interp_mov_f32 v0, P0, 0, 0, [m0] ; C8020002
> v_interp_mov_f32 v1, P0, 1, 0, [m0] ; C8060102
> v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300
> v_interp_mov_f32 v1, P0, 2, 0, [m0] ; C8060202
> v_interp_mov_f32 v2, P0, 3, 0, [m0] ; C80A0302
> v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501
> exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100
> s_endpgm ; BF810000
>
> *** SHADER STATS ***
> SGPRS: 16
> VGPRS: 4
> Code Size: 40 bytes
> LDS: 0 blocks
> Scratch: 0 bytes per wave
> ********************
> SHADER KEY
> export_16bpc = 0x0
> last_cbuf = 0
> color_two_side = 0
> alpha_func = 7
> alpha_to_one = 0
> poly_stipple = 0
> clamp_color = 0
> FRAG
> DCL IN[0], GENERIC[0], LINEAR
> DCL OUT[0], COLOR
> DCL SAMP[0]
> DCL SVIEW[0], 2D, FLOAT
> 0: TEX OUT[0], IN[0], SAMP[0], 2D
> 1: END
> ; ModuleID = 'tgsi'
>
> define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
> main_body:
> %23 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0
> %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0
> %25 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0
> %26 = load <4 x i32>, <4 x i32> addrspace(2)* %25, align 16, !tbaa !0
> %27 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %12)
> %28 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %12)
> %29 = bitcast float %27 to i32
> %30 = bitcast float %28 to i32
> %31 = insertelement <2 x i32> undef, i32 %29, i32 0
> %32 = insertelement <2 x i32> %31, i32 %30, i32 1
> %33 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %32, <8 x i32> %24, <4 x i32> %26, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
> %34 = extractelement <4 x float> %33, i32 0
> %35 = extractelement <4 x float> %33, i32 1
> %36 = extractelement <4 x float> %33, i32 2
> %37 = extractelement <4 x float> %33, i32 3
> call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %34, float %35, float %36, float %37)
> ret void
> }
>
> ; Function Attrs: nounwind readnone
> declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
>
> ; Function Attrs: nounwind readnone
> declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
>
> declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
>
> attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
> attributes #1 = { nounwind readnone }
>
> !0 = !{!"const", null, i32 1}
>
> Shader Disassembly:
>
> s_wqm_b64 exec, exec ; BEFE0A7E
> s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700
> s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500
> s_mov_b32 m0, s10 ; BEFC030A
> v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
> v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
> v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
> v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
> s_waitcnt lgkmcnt(0) ; BF8C007F
> image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[0:3] ; F0800F00 00030002
> s_waitcnt vmcnt(0) ; BF8C0770
> exp 15, 0, 0, 1, 1, v0, v1, v2, v3 ; F800180F 03020100
> s_endpgm ; BF810000
>
> *** SHADER STATS ***
> SGPRS: 24
> VGPRS: 4
> Code Size: 60 bytes
> LDS: 0 blocks
> Scratch: 0 bytes per wave
> ********************
More information about the llvm-commits
mailing list