[llvm] r217108 - R600/SI: Try to keep i32 mul on SALU

Thu Sep 4 00:05:29 PDT 2014

On 04.09.2014 08:24, Matt Arsenault wrote:
> Author: arsenm
> Date: Wed Sep  3 18:24:35 2014
> New Revision: 217108
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=217108&view=rev
> Log:
> R600/SI: Try to keep i32 mul on SALU

This change broke two piglit tests for me (do you test your changes with
piglit before committing them?):

spec/EXT_transform_feedback/alignment 0 (from the gpu.py profile):

Warning: Illegal instruction detected: Operand has incorrect register class.
  %VGPR4<def> = S_MOV_B32 741092396

Program/Execute/get-local-id (from the quick_cl.py profile):

Warning: Illegal instruction detected: Operand has incorrect register class.
  %VGPR5<def> = S_MOV_B32 100

The IR for the failing shaders is attached.

-- 
Earthling Michel Dänzer            |                  http://www.amd.com
Libre software enthusiast          |                Mesa and X developer
-------------- next part --------------
; ModuleID = 'tgsi'

define void @main([17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [6 x <16 x i8>] addrspace(2)* inreg, [16 x <16 x i8>] addrspace(2)* inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %14 = getelementptr [6 x <16 x i8>] addrspace(2)* %3, i64 0, i32 2
  %15 = load <16 x i8> addrspace(2)* %14, !tbaa !0
  %16 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i64 0, i32 0
  %17 = load <16 x i8> addrspace(2)* %16, !tbaa !0
  %18 = add i32 %5, %10
  %19 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %17, i32 0, i32 %18)
  %20 = extractelement <4 x float> %19, i32 0
  %21 = bitcast float %20 to i32
  %22 = mul i32 %21, 741092396
  %23 = bitcast i32 %22 to float
  %24 = bitcast float %23 to i32
  %25 = add i32 404298267, %24
  %26 = bitcast float %23 to i32
  %27 = add i32 471670303, %26
  %28 = bitcast float %23 to i32
  %29 = add i32 539042339, %28
  %30 = bitcast float %23 to i32
  %31 = add i32 606414375, %30
  %32 = bitcast i32 %25 to float
  %33 = bitcast i32 %27 to float
  %34 = bitcast i32 %29 to float
  %35 = bitcast i32 %31 to float
  %36 = bitcast float %23 to i32
  %37 = add i32 66051, %36
  %38 = bitcast i32 %37 to float
  %39 = bitcast float %23 to i32
  %40 = add i32 67438087, %39
  %41 = bitcast float %23 to i32
  %42 = add i32 134810123, %41
  %43 = bitcast i32 %40 to float
  %44 = bitcast i32 %42 to float
  %45 = bitcast float %23 to i32
  %46 = add i32 202182159, %45
  %47 = bitcast float %23 to i32
  %48 = add i32 269554195, %47
  %49 = bitcast float %23 to i32
  %50 = add i32 336926231, %49
  %51 = bitcast i32 %46 to float
  %52 = bitcast i32 %48 to float
  %53 = bitcast i32 %50 to float
  %54 = bitcast float %23 to i32
  %55 = add i32 673786411, %54
  %56 = bitcast i32 %55 to float
  %57 = lshr i32 %7, 16
  %58 = and i32 %57, 127
  %59 = call i32 @llvm.SI.tid()
  %60 = icmp ult i32 %59, %58
  br i1 %60, label %if-true-block, label %endif-block

if-true-block:                                    ; preds = %main_body
  %61 = add i32 %8, %59
  %62 = mul i32 %9, 4
  %63 = mul i32 %61, 44
  %64 = add i32 %63, %62
  %65 = bitcast float %38 to i32
  call void @llvm.SI.tbuffer.store.i32(<16 x i8> %15, i32 %65, i32 1, i32 %64, i32 0, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
  %66 = bitcast float %43 to i32
  %67 = bitcast float %44 to i32
  %68 = insertelement <2 x i32> undef, i32 %66, i32 0
  %69 = insertelement <2 x i32> %68, i32 %67, i32 1
  call void @llvm.SI.tbuffer.store.v2i32(<16 x i8> %15, <2 x i32> %69, i32 2, i32 %64, i32 0, i32 4, i32 11, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
  %70 = bitcast float %51 to i32
  %71 = bitcast float %52 to i32
  %72 = bitcast float %53 to i32
  %73 = insertelement <4 x i32> undef, i32 %70, i32 0
  %74 = insertelement <4 x i32> %73, i32 %71, i32 1
  %75 = insertelement <4 x i32> %74, i32 %72, i32 2
  call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> %15, <4 x i32> %75, i32 3, i32 %64, i32 0, i32 12, i32 13, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
  %76 = bitcast float %32 to i32
  %77 = bitcast float %33 to i32
  %78 = bitcast float %34 to i32
  %79 = bitcast float %35 to i32
  %80 = insertelement <4 x i32> undef, i32 %76, i32 0
  %81 = insertelement <4 x i32> %80, i32 %77, i32 1
  %82 = insertelement <4 x i32> %81, i32 %78, i32 2
  %83 = insertelement <4 x i32> %82, i32 %79, i32 3
  call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> %15, <4 x i32> %83, i32 4, i32 %64, i32 0, i32 24, i32 14, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
  %84 = bitcast float %56 to i32
  call void @llvm.SI.tbuffer.store.i32(<16 x i8> %15, i32 %84, i32 1, i32 %64, i32 0, i32 40, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
  br label %endif-block

endif-block:                                      ; preds = %main_body, %if-true-block
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %32, float %33, float %34, float %35)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %43, float %44, float %38, float %56)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %51, float %52, float %53, float 0.000000e+00)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00)
  ret void
}

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

; Function Attrs: readnone
declare i32 @llvm.SI.tid() #2

declare void @llvm.SI.tbuffer.store.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)

declare void @llvm.SI.tbuffer.store.v2i32(<16 x i8>, <2 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)

declare void @llvm.SI.tbuffer.store.v4i32(<16 x i8>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" "unsafe-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
-------------- next part --------------
; ModuleID = 'radeon'
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
target triple = "r600--"

; Function Attrs: nounwind
define void @fill3d(i32 addrspace(1)* nocapture %out) #0 {
entry:
  %x.i.i = tail call i32 @llvm.r600.read.tgid.x() #1
  %x.i12.i = tail call i32 @llvm.r600.read.local.size.x() #1
  %mul26.i = mul i32 %x.i12.i, %x.i.i
  %x.i4.i = tail call i32 @llvm.r600.read.tidig.x() #1
  %add.i = add i32 %x.i4.i, %mul26.i
  %y.i.i = tail call i32 @llvm.r600.read.tgid.y() #1
  %y.i14.i = tail call i32 @llvm.r600.read.local.size.y() #1
  %mul30.i = mul i32 %y.i14.i, %y.i.i
  %y.i6.i = tail call i32 @llvm.r600.read.tidig.y() #1
  %add.i13 = add i32 %y.i6.i, %mul30.i
  %mul = shl i32 %add.i13, 2
  %add = add i32 %add.i, %mul
  %z.i.i = tail call i32 @llvm.r600.read.tgid.z() #1
  %z.i16.i = tail call i32 @llvm.r600.read.local.size.z() #1
  %mul33.i = mul i32 %z.i16.i, %z.i.i
  %z.i8.i = tail call i32 @llvm.r600.read.tidig.z() #1
  %add.i12 = add i32 %z.i8.i, %mul33.i
  %mul3 = shl i32 %add.i12, 4
  %add4 = add i32 %add, %mul3
  %mul6 = mul i32 %x.i4.i, 100
  %mul8 = mul i32 %y.i6.i, 10
  %add9 = add i32 %mul8, %mul6
  %add11 = add i32 %add9, %z.i8.i
  %0 = sext i32 %add4 to i64
  %arrayidx = getelementptr inbounds i32 addrspace(1)* %out, i64 %0
  store i32 %add11, i32 addrspace(1)* %arrayidx, align 4, !tbaa !4
  ret void
}

; Function Attrs: nounwind readnone
declare i32 @llvm.r600.read.tgid.x() #1

; Function Attrs: nounwind readnone
declare i32 @llvm.r600.read.tgid.y() #1

; Function Attrs: nounwind readnone
declare i32 @llvm.r600.read.tgid.z() #1

; Function Attrs: nounwind readnone
declare i32 @llvm.r600.read.local.size.x() #1

; Function Attrs: nounwind readnone
declare i32 @llvm.r600.read.local.size.y() #1

; Function Attrs: nounwind readnone
declare i32 @llvm.r600.read.local.size.z() #1

; Function Attrs: nounwind readnone
declare i32 @llvm.r600.read.tidig.x() #1

; Function Attrs: nounwind readnone
declare i32 @llvm.r600.read.tidig.y() #1

; Function Attrs: nounwind readnone
declare i32 @llvm.r600.read.tidig.z() #1

attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }

!opencl.kernels = !{!0, !1, !2}
!llvm.ident = !{!3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3}

!0 = metadata !{null}
!1 = metadata !{null}
!2 = metadata !{void (i32 addrspace(1)*)* @fill3d}
!3 = metadata !{metadata !"clang version 3.6.0 (http://llvm.org/git/clang.git 7a02234a7ba6e785d927d04ff19e66b1ea625e8b) (llvm/trunk 217119)"}
!4 = metadata !{metadata !5, metadata !5, i64 0}
!5 = metadata !{metadata !"int", metadata !6, i64 0}
!6 = metadata !{metadata !"omnipotent char", metadata !7, i64 0}
!7 = metadata !{metadata !"Simple C/C++ TBAA"}