[llvm] r319894 - [InstSimplify] Fold insertelement into undef if index is out of bounds

Tue Dec 12 02:45:18 PST 2017

On 2017-12-12 11:08 AM, Igor Laevsky wrote:
> I reverted problematic changes in the https://reviews.llvm.org/rL320466

Thanks.

> Is it possible to obtain an llvm-ir of the failing test case before the
> optimizations? I can see the miscompile, but I can’t reproduce it using
> optimised good case.

The attached should be the LLVM IR before optimizations.

-- 
Earthling Michel Dänzer               |               http://www.amd.com
Libre software enthusiast             |             Mesa and X developer
-------------- next part --------------
; ModuleID = 'tgsi'
source_filename = "tgsi"
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
target triple = "amdgcn--"

; Function Attrs: alwaysinline
define private amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([12 x <4 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), [0 x float] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), [80 x <8 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 {
main_body:
  %TEMP1.w = alloca float
  %TEMP1.z = alloca float
  %TEMP1.y = alloca float
  %TEMP1.x = alloca float
  %TEMP0.w = alloca float
  %TEMP0.z = alloca float
  %TEMP0.y = alloca float
  %TEMP0.x = alloca float
  %22 = fdiv nsz float 1.000000e+00, %17, !fpmath !0
  %23 = insertelement <4 x float> undef, float %14, i32 0
  %24 = insertelement <4 x float> %23, float %15, i32 1
  %25 = insertelement <4 x float> %24, float %16, i32 2
  %26 = insertelement <4 x float> %25, float %22, i32 3
  %27 = extractelement <4 x float> %26, i32 0
  %28 = fptosi float %27 to i32
  %29 = extractelement <4 x float> %26, i32 1
  %30 = fptosi float %29 to i32
  %31 = bitcast i32 %28 to float
  store float %31, float* %TEMP0.x
  %32 = bitcast i32 %30 to float
  store float %32, float* %TEMP0.y
  %33 = load float, float* %TEMP0.x
  %34 = bitcast float %33 to i32
  %35 = ptrtoint [0 x float] addrspace(2)* %2 to i64
  %36 = bitcast i64 %35 to <2 x i32>
  %37 = extractelement <2 x i32> %36, i32 0
  %38 = extractelement <2 x i32> %36, i32 1
  %39 = insertelement <4 x i32> undef, i32 %37, i32 0
  %40 = insertelement <4 x i32> %39, i32 %38, i32 1
  %41 = insertelement <4 x i32> %40, i32 32, i32 2
  %42 = insertelement <4 x i32> %41, i32 163756, i32 3
  %43 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %42, i32 0)
  %44 = bitcast float %43 to i32
  %45 = add i32 %34, %44
  %46 = load float, float* %TEMP0.y
  %47 = bitcast float %46 to i32
  %48 = ptrtoint [0 x float] addrspace(2)* %2 to i64
  %49 = bitcast i64 %48 to <2 x i32>
  %50 = extractelement <2 x i32> %49, i32 0
  %51 = extractelement <2 x i32> %49, i32 1
  %52 = insertelement <4 x i32> undef, i32 %50, i32 0
  %53 = insertelement <4 x i32> %52, i32 %51, i32 1
  %54 = insertelement <4 x i32> %53, i32 32, i32 2
  %55 = insertelement <4 x i32> %54, i32 163756, i32 3
  %56 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %55, i32 4)
  %57 = bitcast float %56 to i32
  %58 = add i32 %47, %57
  %59 = bitcast i32 %45 to float
  store float %59, float* %TEMP0.x
  %60 = bitcast i32 %58 to float
  store float %60, float* %TEMP0.y
  %61 = ptrtoint [0 x float] addrspace(2)* %2 to i64
  %62 = bitcast i64 %61 to <2 x i32>
  %63 = extractelement <2 x i32> %62, i32 0
  %64 = extractelement <2 x i32> %62, i32 1
  %65 = insertelement <4 x i32> undef, i32 %63, i32 0
  %66 = insertelement <4 x i32> %65, i32 %64, i32 1
  %67 = insertelement <4 x i32> %66, i32 32, i32 2
  %68 = insertelement <4 x i32> %67, i32 163756, i32 3
  %69 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %68, i32 8)
  %70 = bitcast float %69 to i32
  %71 = load float, float* %TEMP0.y
  %72 = bitcast float %71 to i32
  %73 = load float, float* %TEMP0.x
  %74 = bitcast float %73 to i32
  %75 = mul i32 %70, %72
  %76 = add i32 %75, %74
  %77 = bitcast i32 %76 to float
  store float %77, float* %TEMP0.x
  store float 0.000000e+00, float* %TEMP0.w
  %78 = extractelement <4 x float> %26, i32 0
  %79 = fptosi float %78 to i32
  %80 = extractelement <4 x float> %26, i32 1
  %81 = fptosi float %80 to i32
  %82 = bitcast i32 %79 to float
  store float %82, float* %TEMP1.x
  %83 = bitcast i32 %81 to float
  store float %83, float* %TEMP1.y
  store float 0.000000e+00, float* %TEMP1.z
  store float 0.000000e+00, float* %TEMP1.w
  %84 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %3, i32 0, i32 16, !amdgpu.uniform !1
  %85 = load <8 x i32>, <8 x i32> addrspace(2)* %84, !invariant.load !1
  %86 = bitcast [80 x <8 x i32>] addrspace(2)* %3 to [0 x <4 x i32>] addrspace(2)*
  %87 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %86, i32 0, i32 35, !amdgpu.uniform !1
  %88 = load <4 x i32>, <4 x i32> addrspace(2)* %87, !invariant.load !1
  %89 = load float, float* %TEMP1.w
  %90 = bitcast float %89 to i32
  %91 = load float, float* %TEMP1.x
  %92 = bitcast float %91 to i32
  %93 = load float, float* %TEMP1.y
  %94 = bitcast float %93 to i32
  %95 = load float, float* %TEMP1.z
  %96 = bitcast float %95 to i32
  %97 = insertelement <4 x i32> undef, i32 %92, i32 0
  %98 = insertelement <4 x i32> %97, i32 %94, i32 1
  %99 = insertelement <4 x i32> %98, i32 %90, i32 2
  %100 = insertelement <4 x i32> %99, i32 undef, i32 3
  %101 = call nsz <4 x float> @llvm.amdgcn.image.load.mip.v4f32.v4i32.v8i32(<4 x i32> %100, <8 x i32> %85, i32 15, i1 false, i1 false, i1 false, i1 false) #1
  %102 = bitcast <4 x float> %101 to <4 x i32>
  %103 = extractelement <4 x i32> %102, i32 0
  %104 = extractelement <4 x i32> %102, i32 1
  %105 = extractelement <4 x i32> %102, i32 2
  %106 = extractelement <4 x i32> %102, i32 3
  %107 = bitcast i32 %103 to float
  store float %107, float* %TEMP1.x
  %108 = bitcast i32 %104 to float
  store float %108, float* %TEMP1.y
  %109 = bitcast i32 %105 to float
  store float %109, float* %TEMP1.z
  %110 = bitcast i32 %106 to float
  store float %110, float* %TEMP1.w
  %111 = load float, float* %TEMP1.x
  %112 = load float, float* %TEMP1.y
  %113 = load float, float* %TEMP1.z
  %114 = load float, float* %TEMP1.w
  %115 = insertelement <4 x float> undef, float %111, i32 0
  %116 = insertelement <4 x float> %115, float %112, i32 1
  %117 = insertelement <4 x float> %116, float %113, i32 2
  %118 = insertelement <4 x float> %117, float %114, i32 3
  %119 = bitcast [80 x <8 x i32>] addrspace(2)* %3 to [0 x <4 x i32>] addrspace(2)*
  %120 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %119, i32 0, i32 31, !amdgpu.uniform !1
  %121 = load <4 x i32>, <4 x i32> addrspace(2)* %120, !invariant.load !1
  %122 = extractelement <4 x i32> %121, i32 6
  %123 = and i32 %122, -2097153
  %124 = insertelement <4 x i32> %121, i32 %123, i32 6
  %125 = load float, float* %TEMP0.x
  %126 = bitcast float %125 to i32
  call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %118, <4 x i32> %124, i32 %126, i32 0, i1 false, i1 false) #7
  %127 = bitcast float %4 to i32
  %128 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %127, 8
  %129 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %128, float %20, 23
  ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %129
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1

; Function Attrs: nounwind readonly
declare <4 x float> @llvm.amdgcn.image.load.mip.v4f32.v4i32.v8i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #2

; Function Attrs: nounwind writeonly
declare void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i1, i1) #3

; Function Attrs: alwaysinline
define private amdgpu_ps void @ps_epilog(i64 inreg, i64 inreg, i64 inreg, i64 inreg, float inreg, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float) #4 {
main_body:
  call void @llvm.amdgcn.exp.f32(i32 9, i32 0, float undef, float undef, float undef, float undef, i1 true, i1 true) #5
  ret void
}

; Function Attrs: nounwind
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #5

define amdgpu_ps void @wrapper([12 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [0 x float] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [80 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #6 {
main_body:
  %22 = ptrtoint [12 x <4 x i32>] addrspace(2)* %0 to i64
  %23 = bitcast i64 %22 to <2 x i32>
  %24 = extractelement <2 x i32> %23, i32 0
  %25 = extractelement <2 x i32> %23, i32 1
  %26 = ptrtoint [0 x <8 x i32>] addrspace(2)* %1 to i64
  %27 = bitcast i64 %26 to <2 x i32>
  %28 = extractelement <2 x i32> %27, i32 0
  %29 = extractelement <2 x i32> %27, i32 1
  %30 = ptrtoint [0 x float] addrspace(2)* %2 to i64
  %31 = bitcast i64 %30 to <2 x i32>
  %32 = extractelement <2 x i32> %31, i32 0
  %33 = extractelement <2 x i32> %31, i32 1
  %34 = ptrtoint [80 x <8 x i32>] addrspace(2)* %3 to i64
  %35 = bitcast i64 %34 to <2 x i32>
  %36 = extractelement <2 x i32> %35, i32 0
  %37 = extractelement <2 x i32> %35, i32 1
  %38 = bitcast float %4 to i32
  %39 = bitcast <2 x i32> %6 to <2 x float>
  %40 = extractelement <2 x float> %39, i32 0
  %41 = extractelement <2 x float> %39, i32 1
  %42 = bitcast <2 x i32> %7 to <2 x float>
  %43 = extractelement <2 x float> %42, i32 0
  %44 = extractelement <2 x float> %42, i32 1
  %45 = bitcast <2 x i32> %8 to <2 x float>
  %46 = extractelement <2 x float> %45, i32 0
  %47 = extractelement <2 x float> %45, i32 1
  %48 = bitcast <3 x i32> %9 to <3 x float>
  %49 = extractelement <3 x float> %48, i32 0
  %50 = extractelement <3 x float> %48, i32 1
  %51 = extractelement <3 x float> %48, i32 2
  %52 = bitcast <2 x i32> %10 to <2 x float>
  %53 = extractelement <2 x float> %52, i32 0
  %54 = extractelement <2 x float> %52, i32 1
  %55 = bitcast <2 x i32> %11 to <2 x float>
  %56 = extractelement <2 x float> %55, i32 0
  %57 = extractelement <2 x float> %55, i32 1
  %58 = bitcast <2 x i32> %12 to <2 x float>
  %59 = extractelement <2 x float> %58, i32 0
  %60 = extractelement <2 x float> %58, i32 1
  %61 = bitcast i32 %18 to float
  %62 = bitcast i32 %19 to float
  %63 = bitcast i32 %21 to float
  %64 = insertelement <2 x i32> undef, i32 %24, i32 0
  %65 = insertelement <2 x i32> %64, i32 %25, i32 1
  %66 = bitcast <2 x i32> %65 to i64
  %67 = inttoptr i64 %66 to [12 x <4 x i32>] addrspace(2)*
  %68 = insertelement <2 x i32> undef, i32 %28, i32 0
  %69 = insertelement <2 x i32> %68, i32 %29, i32 1
  %70 = bitcast <2 x i32> %69 to i64
  %71 = inttoptr i64 %70 to [0 x <8 x i32>] addrspace(2)*
  %72 = insertelement <2 x i32> undef, i32 %32, i32 0
  %73 = insertelement <2 x i32> %72, i32 %33, i32 1
  %74 = bitcast <2 x i32> %73 to i64
  %75 = inttoptr i64 %74 to [0 x float] addrspace(2)*
  %76 = insertelement <2 x i32> undef, i32 %36, i32 0
  %77 = insertelement <2 x i32> %76, i32 %37, i32 1
  %78 = bitcast <2 x i32> %77 to i64
  %79 = inttoptr i64 %78 to [80 x <8 x i32>] addrspace(2)*
  %80 = bitcast i32 %38 to float
  %81 = insertelement <2 x float> undef, float %40, i32 0
  %82 = insertelement <2 x float> %81, float %41, i32 1
  %83 = bitcast <2 x float> %82 to <2 x i32>
  %84 = insertelement <2 x float> undef, float %43, i32 0
  %85 = insertelement <2 x float> %84, float %44, i32 1
  %86 = bitcast <2 x float> %85 to <2 x i32>
  %87 = insertelement <2 x float> undef, float %46, i32 0
  %88 = insertelement <2 x float> %87, float %47, i32 1
  %89 = bitcast <2 x float> %88 to <2 x i32>
  %90 = insertelement <3 x float> undef, float %49, i32 0
  %91 = insertelement <3 x float> %90, float %50, i32 1
  %92 = insertelement <3 x float> %91, float %51, i32 2
  %93 = bitcast <3 x float> %92 to <3 x i32>
  %94 = insertelement <2 x float> undef, float %53, i32 0
  %95 = insertelement <2 x float> %94, float %54, i32 1
  %96 = bitcast <2 x float> %95 to <2 x i32>
  %97 = insertelement <2 x float> undef, float %56, i32 0
  %98 = insertelement <2 x float> %97, float %57, i32 1
  %99 = bitcast <2 x float> %98 to <2 x i32>
  %100 = insertelement <2 x float> undef, float %59, i32 0
  %101 = insertelement <2 x float> %100, float %60, i32 1
  %102 = bitcast <2 x float> %101 to <2 x i32>
  %103 = bitcast float %61 to i32
  %104 = bitcast float %62 to i32
  %105 = bitcast float %63 to i32
  %106 = call <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([12 x <4 x i32>] addrspace(2)* %67, [0 x <8 x i32>] addrspace(2)* %71, [0 x float] addrspace(2)* %75, [80 x <8 x i32>] addrspace(2)* %79, float %80, i32 %5, <2 x i32> %83, <2 x i32> %86, <2 x i32> %89, <3 x i32> %93, <2 x i32> %96, <2 x i32> %99, <2 x i32> %102, float %13, float %14, float %15, float %16, float %17, i32 %103, i32 %104, float %20, i32 %105)
  %107 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 0
  %108 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 1
  %109 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 2
  %110 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 3
  %111 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 4
  %112 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 5
  %113 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 6
  %114 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 7
  %115 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 8
  %116 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 9
  %117 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 10
  %118 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 11
  %119 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 12
  %120 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 13
  %121 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 14
  %122 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 15
  %123 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 16
  %124 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 17
  %125 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 18
  %126 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 19
  %127 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 20
  %128 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 21
  %129 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 22
  %130 = extractvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %106, 23
  %131 = insertelement <2 x i32> undef, i32 %107, i32 0
  %132 = insertelement <2 x i32> %131, i32 %108, i32 1
  %133 = bitcast <2 x i32> %132 to i64
  %134 = insertelement <2 x i32> undef, i32 %109, i32 0
  %135 = insertelement <2 x i32> %134, i32 %110, i32 1
  %136 = bitcast <2 x i32> %135 to i64
  %137 = insertelement <2 x i32> undef, i32 %111, i32 0
  %138 = insertelement <2 x i32> %137, i32 %112, i32 1
  %139 = bitcast <2 x i32> %138 to i64
  %140 = insertelement <2 x i32> undef, i32 %113, i32 0
  %141 = insertelement <2 x i32> %140, i32 %114, i32 1
  %142 = bitcast <2 x i32> %141 to i64
  %143 = bitcast i32 %115 to float
  call void @ps_epilog(i64 %133, i64 %136, i64 %139, i64 %142, float %143, float %116, float %117, float %118, float %119, float %120, float %121, float %122, float %123, float %124, float %125, float %126, float %127, float %128, float %129, float %130)
  ret void
}

attributes #0 = { alwaysinline "no-signed-zeros-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { nounwind readonly }
attributes #3 = { nounwind writeonly }
attributes #4 = { alwaysinline "InitialPSInputAddr"="16777215" "no-signed-zeros-fp-math"="true" }
attributes #5 = { nounwind }
attributes #6 = { "no-signed-zeros-fp-math"="true" }
attributes #7 = { inaccessiblememonly nounwind }

!0 = !{float 2.500000e+00}
!1 = !{}
radeonsi: Compiling shader 6
SHADER KEY
  part.ps.prolog.color_two_side = 0
  part.ps.prolog.flatshade_colors = 0
  part.ps.prolog.poly_stipple = 0
  part.ps.prolog.force_persp_sample_interp = 0
  part.ps.prolog.force_linear_sample_interp = 0
  part.ps.prolog.force_persp_center_interp = 0
  part.ps.prolog.force_linear_center_interp = 0
  part.ps.prolog.bc_optimize_for_persp = 0
  part.ps.prolog.bc_optimize_for_linear = 0
  part.ps.epilog.spi_shader_col_format = 0x0
  part.ps.epilog.color_is_int8 = 0x0
  part.ps.epilog.color_is_int10 = 0x0
  part.ps.epilog.last_cbuf = 0
  part.ps.epilog.alpha_func = 7
  part.ps.epilog.alpha_to_one = 0
  part.ps.epilog.poly_line_smoothing = 0
  part.ps.epilog.clamp_color = 0

Pixel Shader:
Shader main disassembly:
wrapper:
BB0_0:
	s_load_dwordx8 s[8:15], s[6:7], 0x200                  ; C00E0203 00000200
	v_cvt_i32_f32_e32 v0, v2                               ; 7E001102
	v_cvt_i32_f32_e32 v1, v3                               ; 7E021103
	v_mov_b32_e32 v2, 0                                    ; 7E040280
	s_mov_b32 s7, 0x27fac                                  ; BE8700FF 00027FAC
	s_mov_b32 s6, 32                                       ; BE8600A0
	s_waitcnt lgkmcnt(0)                                   ; BF8C007F
	image_load_mip v[2:5], v[0:3], s[8:15] dmask:0xf unorm ; F0041F00 00020200
	s_buffer_load_dwordx2 s[0:1], s[4:7], 0x0              ; C0260002 00000000
	s_buffer_load_dword s2, s[4:7], 0x8                    ; C0220082 00000008
	s_waitcnt lgkmcnt(0)                                   ; BF8C007F
	v_add_u32_e32 v1, vcc, s1, v1                          ; 32020201
	v_mul_lo_i32 v1, v1, s2                                ; D2850001 00000501
	v_add_u32_e32 v0, vcc, s0, v0                          ; 32000000
	v_add_u32_e32 v0, vcc, v1, v0                          ; 32000101
	s_waitcnt vmcnt(0)                                     ; BF8C0F70
	buffer_store_format_xyzw v[2:5], v0, s[0:3], 0 idxen   ; E01C2000 80000200
	exp null off, off, off, off done vm                    ; C4001890 00000000
	s_endpgm                                               ; BF810000

*** SHADER CONFIG ***
SPI_PS_INPUT_ADDR = 0x0301
SPI_PS_INPUT_ENA  = 0x0301
*** SHADER STATS ***
SGPRS: 96
VGPRS: 8
Spilled SGPRs: 0
Spilled VGPRs: 0
Private memory VGPRs: 0
Code Size: 108 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
Max Waves: 8
********************