[llvm] r288051 - [StructurizeCFG] Use range-based for loops.
Michel Dänzer via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 29 00:12:42 PST 2016
Hi Justin,
On 29/11/16 03:50 AM, Justin Lebar via llvm-commits wrote:
> Author: jlebar
> Date: Mon Nov 28 12:50:03 2016
> New Revision: 288051
>
> URL: http://llvm.org/viewvc/llvm-project?rev=288051&view=rev
> Log:
> [StructurizeCFG] Use range-based for loops.
>
> Reviewers: arsenm
>
> Subscribers: wdng, llvm-commits
>
> Differential Revision: https://reviews.llvm.org/D27000
This change caused an infinite loop in StructurizeCFG::rebuildSSA when
running the piglit test arb_shader_image_load_store-shader-mem-barrier
with the radeonsi driver. You can reproduce it by feeding the attached
LLVM IR to
llc -march=amdgcn -mcpu=kaveri
--
Earthling Michel Dänzer | http://www.amd.com
Libre software enthusiast | Mesa and X developer
-------------- next part --------------
; ModuleID = 'tgsi'
source_filename = "tgsi"
target triple = "amdgcn--"
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0
; Function Attrs: nounwind readonly
declare <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
; Function Attrs: nounwind
declare void @llvm.amdgcn.s.waitcnt(i32) #2
; Function Attrs: nounwind
declare void @llvm.amdgcn.image.store.v4f32.v2i32.v8i32(<4 x float>, <2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #2
; Function Attrs: nounwind
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) #2
define amdgpu_vs void @wrapper([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [24 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) {
main_body:
%14 = add i32 %10, %6
%15 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0, !amdgpu.uniform !0
%16 = load <16 x i8>, <16 x i8> addrspace(2)* %15, align 16, !invariant.load !0
%17 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %16, i32 0, i32 %14)
%18 = extractelement <4 x float> %17, i32 0
%19 = extractelement <4 x float> %17, i32 1
%20 = extractelement <4 x float> %17, i32 2
%21 = extractelement <4 x float> %17, i32 3
%22 = fadd float %18, 1.000000e+00
%23 = fadd float %19, 1.000000e+00
%24 = fmul float %22, 1.280000e+02
%25 = fmul float %23, 3.200000e+01
%26 = fptosi float %24 to i32
%27 = fptosi float %25 to i32
%28 = srem i32 %26, 2
%29 = icmp eq i32 %28, 1
%30 = sdiv i32 %26, 2
%31 = shl nsw i32 %30, 1
%32 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0
%33 = load <8 x i32>, <8 x i32> addrspace(2)* %32, align 32
%34 = insertelement <2 x i32> undef, i32 %31, i32 0
%35 = insertelement <2 x i32> %34, i32 %27, i32 1
%36 = or i32 %31, 1
%37 = insertelement <2 x i32> undef, i32 %36, i32 0
%38 = insertelement <2 x i32> %37, i32 %27, i32 1
%39 = or i32 %31, 1
%40 = insertelement <2 x i32> undef, i32 %39, i32 0
%41 = insertelement <2 x i32> %40, i32 %27, i32 1
%42 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0
%43 = load <8 x i32>, <8 x i32> addrspace(2)* %42, align 32
%44 = insertelement <2 x i32> undef, i32 %31, i32 0
%45 = insertelement <2 x i32> %44, i32 %27, i32 1
br label %loop11.i
loop11.i: ; preds = %endif46.i, %main_body
%46 = phi i32 [ 0, %main_body ], [ %68, %endif46.i ]
%TEMP4.x.0.i = phi float [ 0.000000e+00, %main_body ], [ %69, %endif46.i ]
%47 = icmp sgt i32 %46, 999
br i1 %47, label %main.exit, label %endif15.i
endif15.i: ; preds = %loop11.i
br i1 %29, label %if16.i, label %else36.i
if16.i: ; preds = %endif15.i
%48 = call <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32> %35, <8 x i32> %33, i32 15, i1 true, i1 false, i1 false, i1 false)
%49 = extractelement <4 x float> %48, i32 0
call void @llvm.amdgcn.s.waitcnt(i32 3952)
%50 = call <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32> %38, <8 x i32> %33, i32 15, i1 true, i1 false, i1 false, i1 false)
%51 = extractelement <4 x float> %50, i32 0
%52 = bitcast float %51 to i32
%53 = bitcast float %49 to i32
%54 = icmp ult i32 %52, %53
br i1 %54, label %if28.i, label %endif46.i
if28.i: ; preds = %if16.i
%55 = bitcast float %49 to i32
%56 = shl i32 %55, 16
%57 = bitcast float %51 to i32
%58 = or i32 %56, %57
%59 = bitcast i32 %58 to float
br label %main.exit
else36.i: ; preds = %endif15.i
%60 = insertelement <4 x float> undef, float %TEMP4.x.0.i, i32 0
%61 = insertelement <4 x float> %60, float %TEMP4.x.0.i, i32 1
%62 = insertelement <4 x float> %61, float %TEMP4.x.0.i, i32 2
%63 = insertelement <4 x float> %62, float %TEMP4.x.0.i, i32 3
call void @llvm.amdgcn.image.store.v4f32.v2i32.v8i32(<4 x float> %63, <2 x i32> %41, <8 x i32> %43, i32 15, i1 true, i1 false, i1 false, i1 false)
call void @llvm.amdgcn.s.waitcnt(i32 3952)
%64 = insertelement <4 x float> undef, float %TEMP4.x.0.i, i32 0
%65 = insertelement <4 x float> %64, float %TEMP4.x.0.i, i32 1
%66 = insertelement <4 x float> %65, float %TEMP4.x.0.i, i32 2
%67 = insertelement <4 x float> %66, float %TEMP4.x.0.i, i32 3
call void @llvm.amdgcn.image.store.v4f32.v2i32.v8i32(<4 x float> %67, <2 x i32> %45, <8 x i32> %43, i32 15, i1 true, i1 false, i1 false, i1 false)
br label %endif46.i
endif46.i: ; preds = %else36.i, %if16.i
%68 = add i32 %46, 1
%69 = bitcast i32 %68 to float
br label %loop11.i
main.exit: ; preds = %loop11.i, %if28.i
%70 = phi float [ %59, %if28.i ], [ 0x36F0800000000000, %loop11.i ]
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %70, float 0.000000e+00, float 0.000000e+00, float 0x36A0000000000000)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %18, float %19, float %20, float %21)
ret void
}
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind readonly }
attributes #2 = { nounwind }
!0 = !{}
More information about the llvm-commits
mailing list