[llvm] r308325 - AMDGPU: Figure out private memory regs after lowering
Michel Dänzer via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 24 02:58:32 PDT 2017
Hi Matt,
On 19/07/17 01:44 AM, Matt Arsenault via llvm-commits wrote:
> Author: arsenm
> Date: Tue Jul 18 09:44:56 2017
> New Revision: 308325
>
> URL: http://llvm.org/viewvc/llvm-project?rev=308325&view=rev
> Log:
> AMDGPU: Figure out private memory regs after lowering
>
> Introduce pseudo-registers for registers needed for stack
> access, which are replaced during finalizeLowering.
> Note these pseudo-registers are currently only used for the
> used register location, and not for determining their
> input argument register.
>
> This is better because it avoids the need to try to predict
> whether a call will be emitted from the IR, and also
> detects stack objects introduced by legalization.
>
> Test changes are from the HasStackObjects check being more
> accurate since stack objects introduced during legalization
> are now known.
This broke the piglit test
spec at glsl-1.50@execution at geometry@max-input-components and a bunch of
tests under spec at arb_gpu_shader_int64@execution at built-in-functions, e.g.
spec at arb_gpu_shader_int64@execution at built-in-functions@fs-op-div-int64_t-i64vec4
for me on Kaveri.
Attaching the LLVM IR of the shader from the latter which gets miscompiled.
--
Earthling Michel Dänzer | http://www.amd.com
Libre software enthusiast | Mesa and X developer
-------------- next part --------------
; ModuleID = 'tgsi'
source_filename = "tgsi"
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
target triple = "amdgcn--"
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #0
; Function Attrs: nounwind readnone speculatable
declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
; Function Attrs: nounwind
declare void @llvm.amdgcn.exp.compr.v2i16(i32, i32, <2 x i16>, <2 x i16>, i1, i1) #2
define amdgpu_ps void @wrapper([12 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [80 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #3 {
main_body:
%21 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %1, i64 0, i64 16, !amdgpu.uniform !0
%22 = load <4 x i32>, <4 x i32> addrspace(2)* %21, align 16, !invariant.load !0, !alias.scope !1
%23 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %22, i32 0)
%24 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %22, i32 4)
%25 = bitcast float %23 to i32
%26 = insertelement <2 x i32> undef, i32 %25, i32 0
%27 = bitcast float %24 to i32
%28 = insertelement <2 x i32> %26, i32 %27, i32 1
%29 = bitcast <2 x i32> %28 to i64
%30 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %22, i32 16)
%31 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %22, i32 20)
%32 = bitcast float %30 to i32
%33 = insertelement <2 x i32> undef, i32 %32, i32 0
%34 = bitcast float %31 to i32
%35 = insertelement <2 x i32> %33, i32 %34, i32 1
%36 = bitcast <2 x i32> %35 to i64
%37 = sdiv i64 %29, %36
%38 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %22, i32 24)
%39 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %22, i32 28)
%40 = bitcast float %38 to i32
%41 = insertelement <2 x i32> undef, i32 %40, i32 0
%42 = bitcast float %39 to i32
%43 = insertelement <2 x i32> %41, i32 %42, i32 1
%44 = bitcast <2 x i32> %43 to i64
%45 = sdiv i64 %29, %44
%46 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %22, i32 32)
%47 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %22, i32 36)
%48 = bitcast float %46 to i32
%49 = insertelement <2 x i32> undef, i32 %48, i32 0
%50 = bitcast float %47 to i32
%51 = insertelement <2 x i32> %49, i32 %50, i32 1
%52 = bitcast <2 x i32> %51 to i64
%53 = sdiv i64 %29, %52
%54 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %22, i32 40)
%55 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %22, i32 44)
%56 = bitcast float %54 to i32
%57 = insertelement <2 x i32> undef, i32 %56, i32 0
%58 = bitcast float %55 to i32
%59 = insertelement <2 x i32> %57, i32 %58, i32 1
%60 = bitcast <2 x i32> %59 to i64
%61 = sdiv i64 %29, %60
%62 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %22, i32 48)
%63 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %22, i32 52)
%64 = bitcast float %62 to i32
%65 = insertelement <2 x i32> undef, i32 %64, i32 0
%66 = bitcast float %63 to i32
%67 = insertelement <2 x i32> %65, i32 %66, i32 1
%68 = bitcast <2 x i32> %67 to i64
%69 = icmp eq i64 %37, %68
%70 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %22, i32 56)
%71 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %22, i32 60)
%72 = bitcast float %70 to i32
%73 = insertelement <2 x i32> undef, i32 %72, i32 0
%74 = bitcast float %71 to i32
%75 = insertelement <2 x i32> %73, i32 %74, i32 1
%76 = bitcast <2 x i32> %75 to i64
%77 = icmp eq i64 %45, %76
%78 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %22, i32 64)
%79 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %22, i32 68)
%80 = bitcast float %78 to i32
%81 = insertelement <2 x i32> undef, i32 %80, i32 0
%82 = bitcast float %79 to i32
%83 = insertelement <2 x i32> %81, i32 %82, i32 1
%84 = bitcast <2 x i32> %83 to i64
%85 = icmp eq i64 %53, %84
%86 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %22, i32 72)
%87 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %22, i32 76)
%88 = bitcast float %86 to i32
%89 = insertelement <2 x i32> undef, i32 %88, i32 0
%90 = bitcast float %87 to i32
%91 = insertelement <2 x i32> %89, i32 %90, i32 1
%92 = bitcast <2 x i32> %91 to i64
%93 = icmp eq i64 %61, %92
%94 = and i1 %69, %77
%95 = and i1 %85, %93
%96 = and i1 %94, %95
%97 = select i1 %96, float 0.000000e+00, float 1.000000e+00
%98 = select i1 %96, float 1.000000e+00, float 0.000000e+00
%99 = call nsz <2 x half> @llvm.amdgcn.cvt.pkrtz(float %97, float %98) #0
%100 = bitcast <2 x half> %99 to <2 x i16>
call void @llvm.amdgcn.exp.compr.v2i16(i32 0, i32 15, <2 x i16> %100, <2 x i16> <i16 0, i16 15360>, i1 true, i1 true) #2
ret void
}
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind readnone speculatable }
attributes #2 = { nounwind }
attributes #3 = { "no-signed-zeros-fp-math"="true" }
!0 = !{}
!1 = !{!2}
!2 = distinct !{!2, !3, !"main: argument 0"}
!3 = distinct !{!3, !"main"}
More information about the llvm-commits
mailing list