[llvm] r319894 - [InstSimplify] Fold insertelement into undef if index is out of bounds
Michel Dänzer via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 12 02:45:18 PST 2017
On 2017-12-12 11:08 AM, Igor Laevsky wrote:
> I reverted problematic changes in the https://reviews.llvm.org/rL320466
Thanks.
> Is it possible to obtain an llvm-ir of the failing test case before the
> optimizations? I can see the miscompile, but I can’t reproduce it using
> optimised good case.
The attached should be the LLVM IR before optimizations.
--
Earthling Michel Dänzer | http://www.amd.com
Libre software enthusiast | Mesa and X developer
-------------- next part --------------
; ModuleID = 'tgsi'
source_filename = "tgsi"
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
target triple = "amdgcn--"
; Function Attrs: alwaysinline
define private amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([12 x <4 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), [0 x float] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), [80 x <8 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 {
main_body:
%TEMP1.w = alloca float
%TEMP1.z = alloca float
%TEMP1.y = alloca float
%TEMP1.x = alloca float
%TEMP0.w = alloca float
%TEMP0.z = alloca float
%TEMP0.y = alloca float
%TEMP0.x = alloca float
%22 = fdiv nsz float 1.000000e+00, %17, !fpmath !0
%23 = insertelement <4 x float> undef, float %14, i32 0
%24 = insertelement <4 x float> %23, float %15, i32 1
%25 = insertelement <4 x float> %24, float %16, i32 2
%26 = insertelement <4 x float> %25, float %22, i32 3
%27 = extractelement <4 x float> %26, i32 0
%28 = fptosi float %27 to i32
%29 = extractelement <4 x float> %26, i32 1
%30 = fptosi float %29 to i32
%31 = bitcast i32 %28 to float
store float %31, float* %TEMP0.x
%32 = bitcast i32 %30 to float
store float %32, float* %TEMP0.y
%33 = load float, float* %TEMP0.x
%34 = bitcast float %33 to i32
%35 = ptrtoint [0 x float] addrspace(2)* %2 to i64
%36 = bitcast i64 %35 to <2 x i32>
%37 = extractelement <2 x i32> %36, i32 0
%38 = extractelement <2 x i32> %36, i32 1
%39 = insertelement <4 x i32> undef, i32 %37, i32 0
%40 = insertelement <4 x i32> %39, i32 %38, i32 1
%41 = insertelement <4 x i32> %40, i32 32, i32 2
%42 = insertelement <4 x i32> %41, i32 163756, i32 3
%43 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %42, i32 0)
%44 = bitcast float %43 to i32
%45 = add i32 %34, %44
%46 = load float, float* %TEMP0.y
%47 = bitcast float %46 to i32
%48 = ptrtoint [0 x float] addrspace(2)* %2 to i64
%49 = bitcast i64 %48 to <2 x i32>
%50 = extractelement <2 x i32> %49, i32 0
%51 = extractelement <2 x i32> %49, i32 1
%52 = insertelement <4 x i32> undef, i32 %50, i32 0
%53 = insertelement <4 x i32> %52, i32 %51, i32 1
%54 = insertelement <4 x i32> %53, i32 32, i32 2
%55 = insertelement <4 x i32> %54, i32 163756, i32 3
%56 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %55, i32 4)
%57 = bitcast float %56 to i32
%58 = add i32 %47, %57
%59 = bitcast i32 %45 to float
store float %59, float* %TEMP0.x
%60 = bitcast i32 %58 to float
store float %60, float* %TEMP0.y
%61 = ptrtoint [0 x float] addrspace(2)* %2 to i64
%62 = bitcast i64 %61 to <2 x i32>
%63 = extractelement <2 x i32> %62, i32 0
%64 = extractelement <2 x i32> %62, i32 1
%65 = insertelement <4 x i32> undef, i32 %63, i32 0
%66 = insertelement <4 x i32> %65, i32 %64, i32 1
%67 = insertelement <4 x i32> %66, i32 32, i32 2
%68 = insertelement <4 x i32> %67, i32 163756, i32 3
%69 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %68, i32 8)
%70 = bitcast float %69 to i32
%71 = load float, float* %TEMP0.y
%72 = bitcast float %71 to i32
%73 = load float, float* %TEMP0.x
%74 = bitcast float %73 to i32
%75 = mul i32 %70, %72
%76 = add i32 %75, %74
%77 = bitcast i32 %76 to float
store float %77, float* %TEMP0.x
store float 0.000000e+00, float* %TEMP0.w
%78 = extractelement <4 x float> %26, i32 0
%79 = fptosi float %78 to i32
%80 = extractelement <4 x float> %26, i32 1
%81 = fptosi float %80 to i32
%82 = bitcast i32 %79 to float
store float %82, float* %TEMP1.x
%83 = bitcast i32 %81 to float
store float %83, float* %TEMP1.y
store float 0.000000e+00, float* %TEMP1.z
store float 0.000000e+00, float* %TEMP1.w
%84 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %3, i32 0, i32 16, !amdgpu.uniform !1
%85 = load <8 x i32>, <8 x i32> addrspace(2)* %84, !invariant.load !1
%86 = bitcast [80 x <8 x i32>] addrspace(2)* %3 to [0 x <4 x i32>] addrspace(2)*
%87 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %86, i32 0, i32 35, !amdgpu.uniform !1
%88 = load <4 x i32>, <4 x i32> addrspace(2)* %87, !invariant.load !1
%89 = load float, float* %TEMP1.w
%90 = bitcast float %89 to i32
%91 = load float, float* %TEMP1.x
%92 = bitcast float %91 to i32
%93 = load float, float* %TEMP1.y
%94 = bitcast float %93 to i32
%95 = load float, float* %TEMP1.z
%96 = bitcast float %95 to i32
%97 = insertelement <4 x i32> undef, i32 %92, i32 0
%98 = insertelement <4 x i32> %97, i32 %94, i32 1
%99 = insertelement <4 x i32> %98, i32 %90, i32 2
%100 = insertelement <4 x i32> %99, i32 undef, i32 3
%101 = call nsz <4 x float> @llvm.amdgcn.image.load.mip.v4f32.v4i32.v8i32(<4 x i32> %100, <8 x i32> %85, i32 15, i1 false, i1 false, i1 false, i1 false) #1
%102 = bitcast <4 x float> %101 to <4 x i32>
%103 = extractelement <4 x i32> %102, i32 0
%104 = extractelement <4 x i32> %102, i32 1
%105 = extractelement <4 x i32> %102, i32 2
%106 = extractelement <4 x i32> %102, i32 3
%107 = bitcast i32 %103 to float
store float %107, float* %TEMP1.x
%108 = bitcast i32 %104 to float
store float %108, float* %TEMP1.y
%109 = bitcast i32 %105 to float
store float %109, float* %TEMP1.z
%110 = bitcast i32 %106 to float
store float %110, float* %TEMP1.w
%111 = load float, float* %TEMP1.x
%112 = load float, float* %TEMP1.y
%113 = load float, float* %TEMP1.z
%114 = load float, float* %TEMP1.w
%115 = insertelement <4 x float> undef, float %111, i32 0
%116 = insertelement <4 x float> %115, float %112, i32 1
%117 = insertelement <4 x float> %116, float %113, i32 2
%118 = insertelement <4 x float> %117, float %114, i32 3
%119 = bitcast [80 x <8 x i32>] addrspace(2)* %3 to [0 x <4 x i32>] addrspace(2)*
%120 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %119, i32 0, i32 31, !amdgpu.uniform !1
%121 = load <4 x i32>, <4 x i32> addrspace(2)* %120, !invariant.load !1
%122 = extractelement <4 x i32> %121, i32 6
%123 = and i32 %122, -2097153
%124 = insertelement <4 x i32> %121, i32 %123, i32 6
%125 = load float, float* %TEMP0.x
%126 = bitcast float %125 to i32
call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %118, <4 x i32> %124, i32 %126, i32 0, i1 false, i1 false) #7
%127 = bitcast float %4 to i32
%128 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %127, 8
%129 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %128, float %20, 23
ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %129
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1
; Function Attrs: nounwind readonly
declare <4 x float> @llvm.amdgcn.image.load.mip.v4f32.v4i32.v8i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #2
; Function Attrs: nounwind writeonly
declare void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i1, i1) #3
; Function Attrs: alwaysinline
define private amdgpu_ps void @ps_epilog(i64 inreg, i64 inreg, i64 inreg, i64 inreg, float inreg, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float) #4 {
main_body:
call void @llvm.amdgcn.exp.f32(i32 9, i32 0, float undef, float undef, float undef, float undef, i1 true, i1 true) #5
ret void
}
; Function Attrs: nounwind
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #5
define amdgpu_ps void @wrapper([12 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [0 x float] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [80 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #6 {
main_body:
%22 = ptrtoint [12 x <4 x i32>] addrspace(2)* %0 to i64
%23 = bitcast i64 %22 to <2 x i32>
%24 = extractelement <2 x i32> %23, i32 0
%25 = extractelement <2 x i32> %23, i32 1
%26 = ptrtoint [0 x <8 x i32>] addrspace(2)* %1 to i64
%27 = bitcast i64 %26 to <2 x i32>
%28 = extractelement <2 x i32> %27, i32 0
%29 = extractelement <2 x i32> %27, i32 1
%30 = ptrtoint [0 x float] addrspace(2)* %2 to i64
%31 = bitcast i64 %30 to <2 x i32>
%32 = extractelement <2 x i32> %31, i32 0
%33 = extractelement <2 x i32> %31, i32 1
%34 = ptrtoint [80 x <8 x i32>] addrspace(2)* %3 to i64
%35 = bitcast i64 %34 to <2 x i32>
%36 = extractelement <2 x i32> %35, i32 0
%37 = extractelement <2 x i32> %35, i32 1
%38 = bitcast float %4 to i32
%39 = bitcast <2 x i32> %6 to <2 x float>
%40 = extractelement <2 x float> %39, i32 0
%41 = extractelement <2 x float> %39, i32 1
%42 = bitcast <2 x i32> %7 to <2 x float>
%43 = extractelement <2 x float> %42, i32 0
%44 = extractelement <2 x float> %42, i32 1
%45 = bitcast <2 x i32> %8 to <2 x float>
%46 = extractelement <2 x float> %45, i32 0
%47 = extractelement <2 x float> %45, i32 1
%48 = bitcast <3 x i32> %9 to <3 x float>
%49 = extractelement <3 x float> %48, i32 0
%50 = extractelement <3 x float> %48, i32 1
%51 = extractelement <3 x float> %48, i32 2
%52 = bitcast <2 x i32> %10 to <2 x float>
%53 = extractelement <2 x float> %52, i32 0
%54 = extractelement <2 x float> %52, i32 1
%55 = bitcast <2 x i32> %11 to <2 x float>
%56 = extractelement <2 x float> %55, i32 0
%57 = extractelement <2 x float> %55, i32 1
%58 = bitcast <2 x i32> %12 to <2 x float>
%59 = extractelement <2 x float> %58, i32 0
%60 = extractelement <2 x float> %58, i32 1
%61 = bitcast i32 %18 to float
%62 = bitcast i32 %19 to float
%63 = bitcast i32 %21 to float
%64 = insertelement <2 x i32> undef, i32 %24, i32 0
%65 = insertelement <2 x i32> %64, i32 %25, i32 1
%66 = bitcast <2 x i32> %65 to i64
%67 = inttoptr i64 %66 to [12 x <4 x i32>] addrspace(2)*
%68 = insertelement <2 x i32> undef, i32 %28, i32 0
%69 = insertelement <2 x i32> %68, i32 %29, i32 1
%70 = bitcast <2 x i32> %69 to i64
%71 = inttoptr i64 %70 to [0 x <8 x i32>] addrspace(2)*
%72 = insertelement <2 x i32> undef, i32 %32, i32 0
%73 = insertelement <2 x i32> %72, i32 %33, i32 1
%74 = bitcast <2 x i32> %73 to i64
%75 = inttoptr i64 %74 to [0 x float] addrspace(2)*
%76 = insertelement <2 x i32> undef, i32 %36, i32 0
%77 = insertelement <2 x i32> %76, i32 %37, i32 1
%78 = bitcast <2 x i32> %77 to i64
%79 = inttoptr i64 %78 to [80 x <8 x i32>] addrspace(2)*
%80 = bitcast i32 %38 to float
%81 = insertelement <2 x float> undef, float %40, i32 0
%82 = insertelement <2 x float> %81, float %41, i32 1
%83 = bitcast <2 x float> %82 to <2 x i32>
%84 = insertelement <2 x float> undef, float %43, i32 0
%85 = insertelement <2 x float> %84, float %44, i32 1
%86 = bitcast <2 x float> %85 to <2 x i32>
%87 = insertelement <2 x float> undef, float %46, i32 0
%88 = insertelement <2 x float> %87, float %47, i32 1
%89 = bitcast <2 x float> %88 to <2 x i32>
%90 = insertelement <3 x float> undef, float %49, i32 0
%91 = insertelement <3 x float> %90, float %50, i32 1
%92 = insertelement <3 x float> %91, float %51, i32 2
%93 = bitcast <3 x float> %92 to <3 x i32>
%94 = insertelement <2 x float> undef, float %53, i32 0
%95 = insertelement <2 x float> %94, float %54, i32 1
%96 = bitcast <2 x float> %95 to <2 x i32>
%97 = insertelement <2 x float> undef, float %56, i32 0
%98 = insertelement <2 x float> %97, float %57, i32 1
%99 = bitcast <2 x float> %98 to <2 x i32>
%100 = insertelement <2 x float> undef, float %59, i32 0
%101 = insertelement <2 x float> %100, float %60, i32 1
%102 = bitcast <2 x float> %101 to <2 x i32>
%103 = bitcast float %61 to i32
%104 = bitcast float %62 to i32
%105 = bitcast float %63 to i32
%106 = call <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([12 x <4 x i32>] addrspace(2)* %67, [0 x <8 x i32>] addrspace(2)* %71, [0 x float] addrspace(2)* %75, [80 x <8 x i32>] addrspace(2)* %79, float %80, i32 %5, <2 x i32> %83, <2 x i32> %86, <2 x i32> %89, <3 x i32> %93, <2 x i32> %96, <2 x i32> %99, <2 x i32> %102, float %13, float %14, float %15, float %16, float %17, i32 %103, i32 %104, float %20, i32 %105)
%107 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 0
%108 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 1
%109 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 2
%110 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 3
%111 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 4
%112 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 5
%113 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 6
%114 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 7
%115 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 8
%116 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 9
%117 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 10
%118 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 11
%119 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 12
%120 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 13
%121 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 14
%122 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 15
%123 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 16
%124 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 17
%125 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 18
%126 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 19
%127 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 20
%128 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 21
%129 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 22
%130 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 23
%131 = insertelement <2 x i32> undef, i32 %107, i32 0
%132 = insertelement <2 x i32> %131, i32 %108, i32 1
%133 = bitcast <2 x i32> %132 to i64
%134 = insertelement <2 x i32> undef, i32 %109, i32 0
%135 = insertelement <2 x i32> %134, i32 %110, i32 1
%136 = bitcast <2 x i32> %135 to i64
%137 = insertelement <2 x i32> undef, i32 %111, i32 0
%138 = insertelement <2 x i32> %137, i32 %112, i32 1
%139 = bitcast <2 x i32> %138 to i64
%140 = insertelement <2 x i32> undef, i32 %113, i32 0
%141 = insertelement <2 x i32> %140, i32 %114, i32 1
%142 = bitcast <2 x i32> %141 to i64
%143 = bitcast i32 %115 to float
call void @ps_epilog(i64 %133, i64 %136, i64 %139, i64 %142, float %143, float %116, float %117, float %118, float %119, float %120, float %121, float %122, float %123, float %124, float %125, float %126, float %127, float %128, float %129, float %130)
ret void
}
attributes #0 = { alwaysinline "no-signed-zeros-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { nounwind readonly }
attributes #3 = { nounwind writeonly }
attributes #4 = { alwaysinline "InitialPSInputAddr"="16777215" "no-signed-zeros-fp-math"="true" }
attributes #5 = { nounwind }
attributes #6 = { "no-signed-zeros-fp-math"="true" }
attributes #7 = { inaccessiblememonly nounwind }
!0 = !{float 2.500000e+00}
!1 = !{}
radeonsi: Compiling shader 6
SHADER KEY
part.ps.prolog.color_two_side = 0
part.ps.prolog.flatshade_colors = 0
part.ps.prolog.poly_stipple = 0
part.ps.prolog.force_persp_sample_interp = 0
part.ps.prolog.force_linear_sample_interp = 0
part.ps.prolog.force_persp_center_interp = 0
part.ps.prolog.force_linear_center_interp = 0
part.ps.prolog.bc_optimize_for_persp = 0
part.ps.prolog.bc_optimize_for_linear = 0
part.ps.epilog.spi_shader_col_format = 0x0
part.ps.epilog.color_is_int8 = 0x0
part.ps.epilog.color_is_int10 = 0x0
part.ps.epilog.last_cbuf = 0
part.ps.epilog.alpha_func = 7
part.ps.epilog.alpha_to_one = 0
part.ps.epilog.poly_line_smoothing = 0
part.ps.epilog.clamp_color = 0
Pixel Shader:
Shader main disassembly:
wrapper:
BB0_0:
s_load_dwordx8 s[8:15], s[6:7], 0x200 ; C00E0203 00000200
v_cvt_i32_f32_e32 v0, v2 ; 7E001102
v_cvt_i32_f32_e32 v1, v3 ; 7E021103
v_mov_b32_e32 v2, 0 ; 7E040280
s_mov_b32 s7, 0x27fac ; BE8700FF 00027FAC
s_mov_b32 s6, 32 ; BE8600A0
s_waitcnt lgkmcnt(0) ; BF8C007F
image_load_mip v[2:5], v[0:3], s[8:15] dmask:0xf unorm ; F0041F00 00020200
s_buffer_load_dwordx2 s[0:1], s[4:7], 0x0 ; C0260002 00000000
s_buffer_load_dword s2, s[4:7], 0x8 ; C0220082 00000008
s_waitcnt lgkmcnt(0) ; BF8C007F
v_add_u32_e32 v1, vcc, s1, v1 ; 32020201
v_mul_lo_i32 v1, v1, s2 ; D2850001 00000501
v_add_u32_e32 v0, vcc, s0, v0 ; 32000000
v_add_u32_e32 v0, vcc, v1, v0 ; 32000101
s_waitcnt vmcnt(0) ; BF8C0F70
buffer_store_format_xyzw v[2:5], v0, s[0:3], 0 idxen ; E01C2000 80000200
exp null off, off, off, off done vm ; C4001890 00000000
s_endpgm ; BF810000
*** SHADER CONFIG ***
SPI_PS_INPUT_ADDR = 0x0301
SPI_PS_INPUT_ENA = 0x0301
*** SHADER STATS ***
SGPRS: 96
VGPRS: 8
Spilled SGPRs: 0
Spilled VGPRs: 0
Private memory VGPRs: 0
Code Size: 108 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
Max Waves: 8
********************
More information about the llvm-commits
mailing list