[llvm] r288051 - [StructurizeCFG] Use range-based for loops.

Tue Nov 29 00:12:42 PST 2016

Hi Justin,

On 29/11/16 03:50 AM, Justin Lebar via llvm-commits wrote:
> Author: jlebar
> Date: Mon Nov 28 12:50:03 2016
> New Revision: 288051
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=288051&view=rev
> Log:
> [StructurizeCFG] Use range-based for loops.
> 
> Reviewers: arsenm
> 
> Subscribers: wdng, llvm-commits
> 
> Differential Revision: https://reviews.llvm.org/D27000

This change caused an infinite loop in StructurizeCFG::rebuildSSA when
running the piglit test arb_shader_image_load_store-shader-mem-barrier
with the radeonsi driver. You can reproduce it by feeding the attached
LLVM IR to

llc -march=amdgcn -mcpu=kaveri

-- 
Earthling Michel Dänzer               |               http://www.amd.com
Libre software enthusiast             |             Mesa and X developer
-------------- next part --------------
; ModuleID = 'tgsi'
source_filename = "tgsi"
target triple = "amdgcn--"

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0

; Function Attrs: nounwind readonly
declare <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1

; Function Attrs: nounwind
declare void @llvm.amdgcn.s.waitcnt(i32) #2

; Function Attrs: nounwind
declare void @llvm.amdgcn.image.store.v4f32.v2i32.v8i32(<4 x float>, <2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #2

; Function Attrs: nounwind
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) #2

define amdgpu_vs void @wrapper([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [24 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) {
main_body:
  %14 = add i32 %10, %6
  %15 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0, !amdgpu.uniform !0
  %16 = load <16 x i8>, <16 x i8> addrspace(2)* %15, align 16, !invariant.load !0
  %17 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %16, i32 0, i32 %14)
  %18 = extractelement <4 x float> %17, i32 0
  %19 = extractelement <4 x float> %17, i32 1
  %20 = extractelement <4 x float> %17, i32 2
  %21 = extractelement <4 x float> %17, i32 3
  %22 = fadd float %18, 1.000000e+00
  %23 = fadd float %19, 1.000000e+00
  %24 = fmul float %22, 1.280000e+02
  %25 = fmul float %23, 3.200000e+01
  %26 = fptosi float %24 to i32
  %27 = fptosi float %25 to i32
  %28 = srem i32 %26, 2
  %29 = icmp eq i32 %28, 1
  %30 = sdiv i32 %26, 2
  %31 = shl nsw i32 %30, 1
  %32 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0
  %33 = load <8 x i32>, <8 x i32> addrspace(2)* %32, align 32
  %34 = insertelement <2 x i32> undef, i32 %31, i32 0
  %35 = insertelement <2 x i32> %34, i32 %27, i32 1
  %36 = or i32 %31, 1
  %37 = insertelement <2 x i32> undef, i32 %36, i32 0
  %38 = insertelement <2 x i32> %37, i32 %27, i32 1
  %39 = or i32 %31, 1
  %40 = insertelement <2 x i32> undef, i32 %39, i32 0
  %41 = insertelement <2 x i32> %40, i32 %27, i32 1
  %42 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0
  %43 = load <8 x i32>, <8 x i32> addrspace(2)* %42, align 32
  %44 = insertelement <2 x i32> undef, i32 %31, i32 0
  %45 = insertelement <2 x i32> %44, i32 %27, i32 1
  br label %loop11.i

loop11.i:                                         ; preds = %endif46.i, %main_body
  %46 = phi i32 [ 0, %main_body ], [ %68, %endif46.i ]
  %TEMP4.x.0.i = phi float [ 0.000000e+00, %main_body ], [ %69, %endif46.i ]
  %47 = icmp sgt i32 %46, 999
  br i1 %47, label %main.exit, label %endif15.i

endif15.i:                                        ; preds = %loop11.i
  br i1 %29, label %if16.i, label %else36.i

if16.i:                                           ; preds = %endif15.i
  %48 = call <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32> %35, <8 x i32> %33, i32 15, i1 true, i1 false, i1 false, i1 false)
  %49 = extractelement <4 x float> %48, i32 0
  call void @llvm.amdgcn.s.waitcnt(i32 3952)
  %50 = call <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32> %38, <8 x i32> %33, i32 15, i1 true, i1 false, i1 false, i1 false)
  %51 = extractelement <4 x float> %50, i32 0
  %52 = bitcast float %51 to i32
  %53 = bitcast float %49 to i32
  %54 = icmp ult i32 %52, %53
  br i1 %54, label %if28.i, label %endif46.i

if28.i:                                           ; preds = %if16.i
  %55 = bitcast float %49 to i32
  %56 = shl i32 %55, 16
  %57 = bitcast float %51 to i32
  %58 = or i32 %56, %57
  %59 = bitcast i32 %58 to float
  br label %main.exit

else36.i:                                         ; preds = %endif15.i
  %60 = insertelement <4 x float> undef, float %TEMP4.x.0.i, i32 0
  %61 = insertelement <4 x float> %60, float %TEMP4.x.0.i, i32 1
  %62 = insertelement <4 x float> %61, float %TEMP4.x.0.i, i32 2
  %63 = insertelement <4 x float> %62, float %TEMP4.x.0.i, i32 3
  call void @llvm.amdgcn.image.store.v4f32.v2i32.v8i32(<4 x float> %63, <2 x i32> %41, <8 x i32> %43, i32 15, i1 true, i1 false, i1 false, i1 false)
  call void @llvm.amdgcn.s.waitcnt(i32 3952)
  %64 = insertelement <4 x float> undef, float %TEMP4.x.0.i, i32 0
  %65 = insertelement <4 x float> %64, float %TEMP4.x.0.i, i32 1
  %66 = insertelement <4 x float> %65, float %TEMP4.x.0.i, i32 2
  %67 = insertelement <4 x float> %66, float %TEMP4.x.0.i, i32 3
  call void @llvm.amdgcn.image.store.v4f32.v2i32.v8i32(<4 x float> %67, <2 x i32> %45, <8 x i32> %43, i32 15, i1 true, i1 false, i1 false, i1 false)
  br label %endif46.i

endif46.i:                                        ; preds = %else36.i, %if16.i
  %68 = add i32 %46, 1
  %69 = bitcast i32 %68 to float
  br label %loop11.i

main.exit:                                        ; preds = %loop11.i, %if28.i
  %70 = phi float [ %59, %if28.i ], [ 0x36F0800000000000, %loop11.i ]
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %70, float 0.000000e+00, float 0.000000e+00, float 0x36A0000000000000)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %18, float %19, float %20, float %21)
  ret void
}

attributes #0 = { nounwind readnone }
attributes #1 = { nounwind readonly }
attributes #2 = { nounwind }

!0 = !{}