[PATCH] D60846: [ValueTracking] Improve isKnowNonZero for Ints
Samuel Pitoiset via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Wed May 8 01:42:03 PDT 2019
hakzsam added a comment.
Hi folks,
This introduces a regression with RADV (the open source Vulkan driver in mesa) and some CTS tests, here's the list of failures:
- dEQP-VK.glsl.functions.control_flow.mixed_return_break_continue_fragment
- dEQP-VK.glsl.functions.control_flow.mixed_return_break_continue_vertex
Here's the LLVM IR generated by Mesa (before any optimizations passes):
; ModuleID = 'mesa-shader'
source_filename = "mesa-shader"
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7"
target triple = "amdgcn-mesa-mesa3d"
define amdgpu_ps void @main([0 x i8] addrspace(6)* inreg noalias dereferenceable(18446744073709551615), i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, i32, i32) #0 {
main_body:
%temp7 = alloca float, addrspace(5)
%temp6 = alloca float, addrspace(5)
%temp5 = alloca float, addrspace(5)
%temp4 = alloca float, addrspace(5)
%temp3 = alloca float, addrspace(5)
%temp2 = alloca float, addrspace(5)
%temp1 = alloca float, addrspace(5)
%temp = alloca float, addrspace(5)
%18 = alloca float, addrspace(5)
%19 = alloca float, addrspace(5)
%20 = alloca float, addrspace(5)
%21 = alloca float, addrspace(5)
%22 = call i8 addrspace(4)* @llvm.amdgcn.implicit.buffer.ptr() #2
%23 = bitcast i8 addrspace(4)* %22 to [0 x <4 x i32>] addrspace(4)*
%24 = call float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %1) #2
%25 = bitcast float %24 to i32
%26 = call float @llvm.amdgcn.interp.mov(i32 2, i32 1, i32 0, i32 %1) #2
%27 = bitcast float %26 to i32
%28 = call float @llvm.amdgcn.interp.mov(i32 2, i32 2, i32 0, i32 %1) #2
%29 = bitcast float %28 to i32
%30 = call float @llvm.amdgcn.interp.mov(i32 2, i32 3, i32 0, i32 %1) #2
%31 = bitcast float %30 to i32
br label %loop1
loop1: ; preds = %endif5, %main_body
%32 = phi i32 [ %25, %main_body ], [ %49, %endif5 ]
%33 = phi i32 [ 0, %main_body ], [ %50, %endif5 ]
%34 = icmp sge i32 %33, 6
%35 = select i1 %34, i32 -1, i32 0
%36 = icmp ne i32 %35, 0
br i1 %36, label %if2, label %else3
if2: ; preds = %loop1
br label %endloop1
else3: ; preds = %loop1
br label %endif2
endif2: ; preds = %else3
%37 = icmp ne i32 %33, 0
%38 = select i1 %37, i32 -1, i32 0
%39 = icmp ne i32 %38, 0
br i1 %39, label %if5, label %else12
if5: ; preds = %endif2
%40 = icmp ne i32 %33, 1
%41 = select i1 %40, i32 -1, i32 0
%42 = icmp ne i32 %41, 0
br i1 %42, label %if6, label %else10
if6: ; preds = %if5
%43 = icmp eq i32 %33, 3
%44 = select i1 %43, i32 -1, i32 0
%45 = icmp ne i32 %44, 0
br i1 %45, label %if7, label %else8
if7: ; preds = %if6
br label %endloop1
else8: ; preds = %if6
br label %endloop1
endif7: ; No predecessors!
br label %endif6
else10: ; preds = %if5
br label %endif6
endif6: ; preds = %else10, %endif7
%46 = bitcast i32 %32 to float
%47 = fsub float -0.000000e+00, %46
%48 = bitcast float %47 to i32
br label %endif5
else12: ; preds = %endif2
br label %endif5
endif5: ; preds = %else12, %endif6
%49 = phi i32 [ %32, %else12 ], [ %48, %endif6 ]
%50 = add i32 %33, 1
br label %loop1
endloop1: ; preds = %else8, %if7, %if2
%51 = phi i32 [ undef, %if2 ], [ undef, %if7 ], [ %32, %else8 ]
%52 = phi i32 [ 0, %if2 ], [ 0, %if7 ], [ -1, %else8 ]
%53 = icmp ne i32 %52, 0
%54 = select i1 %53, i32 %51, i32 1065353216
%55 = getelementptr [0 x i8], [0 x i8] addrspace(6)* %0, i32 0, i32 0
%56 = bitcast i8 addrspace(6)* %55 to <4 x i32> addrspace(6)*, !amdgpu.uniform !0
%57 = load <4 x i32>, <4 x i32> addrspace(6)* %56, !invariant.load !0
%58 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %57, i32 0, i32 0) #2
%59 = bitcast float %58 to i32
%60 = bitcast i32 %59 to float
%61 = fsub float -0.000000e+00, %60
%62 = bitcast float %61 to i32
%63 = bitcast i32 %54 to float
%64 = bitcast i32 %62 to float
%65 = fadd float %63, %64
%66 = bitcast float %65 to i32
%67 = bitcast i32 %66 to float
%68 = call float @llvm.fabs.f32(float %67) #2
%69 = bitcast float %68 to i32
%70 = bitcast i32 %59 to float
%71 = call float @llvm.fabs.f32(float %70) #2
%72 = bitcast float %71 to i32
%73 = bitcast i32 %72 to float
%74 = fmul float 0x3FA99999A0000000, %73
%75 = bitcast float %74 to i32
%76 = bitcast i32 %75 to float
%77 = fadd float %76, 0x3FA99999A0000000
%78 = bitcast float %77 to i32
%79 = bitcast i32 %78 to float
%80 = bitcast i32 %69 to float
%81 = fcmp oge float %79, %80
%82 = select i1 %81, i32 -1, i32 0
%83 = and i32 %82, 1065353216
%84 = bitcast i32 %83 to float
%85 = bitcast float %84 to i32
%86 = insertelement <4 x i32> undef, i32 %85, i32 0
%87 = insertelement <4 x i32> %86, i32 %85, i32 1
%88 = insertelement <4 x i32> %87, i32 %85, i32 2
%89 = insertelement <4 x i32> %88, i32 1065353216, i32 3
%90 = bitcast <4 x i32> %89 to <4 x float>
%91 = extractelement <4 x float> %90, i32 0
store float %91, float addrspace(5)* %21
%92 = extractelement <4 x float> %90, i32 1
store float %92, float addrspace(5)* %20
%93 = extractelement <4 x float> %90, i32 2
store float %93, float addrspace(5)* %19
%94 = extractelement <4 x float> %90, i32 3
store float %94, float addrspace(5)* %18
%95 = load float, float addrspace(5)* %21
%96 = load float, float addrspace(5)* %20
%97 = load float, float addrspace(5)* %19
%98 = load float, float addrspace(5)* %18
%99 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %95, float %96) #2
%100 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %97, float %98) #2
%101 = bitcast <2 x half> %99 to <2 x i16>
%102 = bitcast <2 x half> %100 to <2 x i16>
call void @llvm.amdgcn.exp.compr.v2i16(i32 0, i32 5, <2 x i16> %101, <2 x i16> %102, i1 true, i1 true) #3
ret void
}
; Function Attrs: nounwind readnone speculatable
declare i8 addrspace(4)* @llvm.amdgcn.implicit.buffer.ptr() #1
; Function Attrs: nounwind readnone speculatable
declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.amdgcn.s.buffer.load.f32(<4 x i32>, i32, i32 immarg) #2
; Function Attrs: nounwind readnone speculatable
declare float @llvm.fabs.f32(float) #1
; Function Attrs: nounwind readnone speculatable
declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
; Function Attrs: nounwind
declare void @llvm.amdgcn.exp.compr.v2i16(i32 immarg, i32 immarg, <2 x i16>, <2 x i16>, i1 immarg, i1 immarg) #3
attributes #0 = { "amdgpu-32bit-address-high-bits"="0xffff8000" }
attributes #1 = { nounwind readnone speculatable }
attributes #2 = { nounwind readnone }
attributes #3 = { nounwind }
!0 = !{}
Can you have a look?
Thanks,
Samuel.
Repository:
rL LLVM
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D60846/new/
https://reviews.llvm.org/D60846
More information about the llvm-commits
mailing list