[llvm] r217108 - R600/SI: Try to keep i32 mul on SALU
Michel Dänzer
michel at daenzer.net
Thu Sep 4 00:05:29 PDT 2014
On 04.09.2014 08:24, Matt Arsenault wrote:
> Author: arsenm
> Date: Wed Sep 3 18:24:35 2014
> New Revision: 217108
>
> URL: http://llvm.org/viewvc/llvm-project?rev=217108&view=rev
> Log:
> R600/SI: Try to keep i32 mul on SALU
This change broke two piglit tests for me (do you test your changes with
piglit before committing them?):
spec/EXT_transform_feedback/alignment 0 (from the gpu.py profile):
Warning: Illegal instruction detected: Operand has incorrect register class.
%VGPR4<def> = S_MOV_B32 741092396
Program/Execute/get-local-id (from the quick_cl.py profile):
Warning: Illegal instruction detected: Operand has incorrect register class.
%VGPR5<def> = S_MOV_B32 100
The IR for the failing shaders is attached.
--
Earthling Michel Dänzer | http://www.amd.com
Libre software enthusiast | Mesa and X developer
-------------- next part --------------
; ModuleID = 'tgsi'
define void @main([17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [6 x <16 x i8>] addrspace(2)* inreg, [16 x <16 x i8>] addrspace(2)* inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%14 = getelementptr [6 x <16 x i8>] addrspace(2)* %3, i64 0, i32 2
%15 = load <16 x i8> addrspace(2)* %14, !tbaa !0
%16 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i64 0, i32 0
%17 = load <16 x i8> addrspace(2)* %16, !tbaa !0
%18 = add i32 %5, %10
%19 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %17, i32 0, i32 %18)
%20 = extractelement <4 x float> %19, i32 0
%21 = bitcast float %20 to i32
%22 = mul i32 %21, 741092396
%23 = bitcast i32 %22 to float
%24 = bitcast float %23 to i32
%25 = add i32 404298267, %24
%26 = bitcast float %23 to i32
%27 = add i32 471670303, %26
%28 = bitcast float %23 to i32
%29 = add i32 539042339, %28
%30 = bitcast float %23 to i32
%31 = add i32 606414375, %30
%32 = bitcast i32 %25 to float
%33 = bitcast i32 %27 to float
%34 = bitcast i32 %29 to float
%35 = bitcast i32 %31 to float
%36 = bitcast float %23 to i32
%37 = add i32 66051, %36
%38 = bitcast i32 %37 to float
%39 = bitcast float %23 to i32
%40 = add i32 67438087, %39
%41 = bitcast float %23 to i32
%42 = add i32 134810123, %41
%43 = bitcast i32 %40 to float
%44 = bitcast i32 %42 to float
%45 = bitcast float %23 to i32
%46 = add i32 202182159, %45
%47 = bitcast float %23 to i32
%48 = add i32 269554195, %47
%49 = bitcast float %23 to i32
%50 = add i32 336926231, %49
%51 = bitcast i32 %46 to float
%52 = bitcast i32 %48 to float
%53 = bitcast i32 %50 to float
%54 = bitcast float %23 to i32
%55 = add i32 673786411, %54
%56 = bitcast i32 %55 to float
%57 = lshr i32 %7, 16
%58 = and i32 %57, 127
%59 = call i32 @llvm.SI.tid()
%60 = icmp ult i32 %59, %58
br i1 %60, label %if-true-block, label %endif-block
if-true-block: ; preds = %main_body
%61 = add i32 %8, %59
%62 = mul i32 %9, 4
%63 = mul i32 %61, 44
%64 = add i32 %63, %62
%65 = bitcast float %38 to i32
call void @llvm.SI.tbuffer.store.i32(<16 x i8> %15, i32 %65, i32 1, i32 %64, i32 0, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
%66 = bitcast float %43 to i32
%67 = bitcast float %44 to i32
%68 = insertelement <2 x i32> undef, i32 %66, i32 0
%69 = insertelement <2 x i32> %68, i32 %67, i32 1
call void @llvm.SI.tbuffer.store.v2i32(<16 x i8> %15, <2 x i32> %69, i32 2, i32 %64, i32 0, i32 4, i32 11, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
%70 = bitcast float %51 to i32
%71 = bitcast float %52 to i32
%72 = bitcast float %53 to i32
%73 = insertelement <4 x i32> undef, i32 %70, i32 0
%74 = insertelement <4 x i32> %73, i32 %71, i32 1
%75 = insertelement <4 x i32> %74, i32 %72, i32 2
call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> %15, <4 x i32> %75, i32 3, i32 %64, i32 0, i32 12, i32 13, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
%76 = bitcast float %32 to i32
%77 = bitcast float %33 to i32
%78 = bitcast float %34 to i32
%79 = bitcast float %35 to i32
%80 = insertelement <4 x i32> undef, i32 %76, i32 0
%81 = insertelement <4 x i32> %80, i32 %77, i32 1
%82 = insertelement <4 x i32> %81, i32 %78, i32 2
%83 = insertelement <4 x i32> %82, i32 %79, i32 3
call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> %15, <4 x i32> %83, i32 4, i32 %64, i32 0, i32 24, i32 14, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
%84 = bitcast float %56 to i32
call void @llvm.SI.tbuffer.store.i32(<16 x i8> %15, i32 %84, i32 1, i32 %64, i32 0, i32 40, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
br label %endif-block
endif-block: ; preds = %main_body, %if-true-block
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %32, float %33, float %34, float %35)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %43, float %44, float %38, float %56)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %51, float %52, float %53, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00)
ret void
}
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: readnone
declare i32 @llvm.SI.tid() #2
declare void @llvm.SI.tbuffer.store.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
declare void @llvm.SI.tbuffer.store.v2i32(<16 x i8>, <2 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
declare void @llvm.SI.tbuffer.store.v4i32(<16 x i8>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "unsafe-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = metadata !{metadata !"const", null, i32 1}
-------------- next part --------------
; ModuleID = 'radeon'
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
target triple = "r600--"
; Function Attrs: nounwind
define void @fill3d(i32 addrspace(1)* nocapture %out) #0 {
entry:
%x.i.i = tail call i32 @llvm.r600.read.tgid.x() #1
%x.i12.i = tail call i32 @llvm.r600.read.local.size.x() #1
%mul26.i = mul i32 %x.i12.i, %x.i.i
%x.i4.i = tail call i32 @llvm.r600.read.tidig.x() #1
%add.i = add i32 %x.i4.i, %mul26.i
%y.i.i = tail call i32 @llvm.r600.read.tgid.y() #1
%y.i14.i = tail call i32 @llvm.r600.read.local.size.y() #1
%mul30.i = mul i32 %y.i14.i, %y.i.i
%y.i6.i = tail call i32 @llvm.r600.read.tidig.y() #1
%add.i13 = add i32 %y.i6.i, %mul30.i
%mul = shl i32 %add.i13, 2
%add = add i32 %add.i, %mul
%z.i.i = tail call i32 @llvm.r600.read.tgid.z() #1
%z.i16.i = tail call i32 @llvm.r600.read.local.size.z() #1
%mul33.i = mul i32 %z.i16.i, %z.i.i
%z.i8.i = tail call i32 @llvm.r600.read.tidig.z() #1
%add.i12 = add i32 %z.i8.i, %mul33.i
%mul3 = shl i32 %add.i12, 4
%add4 = add i32 %add, %mul3
%mul6 = mul i32 %x.i4.i, 100
%mul8 = mul i32 %y.i6.i, 10
%add9 = add i32 %mul8, %mul6
%add11 = add i32 %add9, %z.i8.i
%0 = sext i32 %add4 to i64
%arrayidx = getelementptr inbounds i32 addrspace(1)* %out, i64 %0
store i32 %add11, i32 addrspace(1)* %arrayidx, align 4, !tbaa !4
ret void
}
; Function Attrs: nounwind readnone
declare i32 @llvm.r600.read.tgid.x() #1
; Function Attrs: nounwind readnone
declare i32 @llvm.r600.read.tgid.y() #1
; Function Attrs: nounwind readnone
declare i32 @llvm.r600.read.tgid.z() #1
; Function Attrs: nounwind readnone
declare i32 @llvm.r600.read.local.size.x() #1
; Function Attrs: nounwind readnone
declare i32 @llvm.r600.read.local.size.y() #1
; Function Attrs: nounwind readnone
declare i32 @llvm.r600.read.local.size.z() #1
; Function Attrs: nounwind readnone
declare i32 @llvm.r600.read.tidig.x() #1
; Function Attrs: nounwind readnone
declare i32 @llvm.r600.read.tidig.y() #1
; Function Attrs: nounwind readnone
declare i32 @llvm.r600.read.tidig.z() #1
attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
!opencl.kernels = !{!0, !1, !2}
!llvm.ident = !{!3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3}
!0 = metadata !{null}
!1 = metadata !{null}
!2 = metadata !{void (i32 addrspace(1)*)* @fill3d}
!3 = metadata !{metadata !"clang version 3.6.0 (http://llvm.org/git/clang.git 7a02234a7ba6e785d927d04ff19e66b1ea625e8b) (llvm/trunk 217119)"}
!4 = metadata !{metadata !5, metadata !5, i64 0}
!5 = metadata !{metadata !"int", metadata !6, i64 0}
!6 = metadata !{metadata !"omnipotent char", metadata !7, i64 0}
!7 = metadata !{metadata !"Simple C/C++ TBAA"}
More information about the llvm-commits
mailing list