[llvm] r255672 - AMDGPU/SI: Select constant loads with non-uniform addresses to MUBUF instructions
Tom Stellard via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 18 18:58:45 PST 2015
On Wed, Dec 16, 2015 at 05:16:22PM +0900, Michel Dänzer via llvm-commits wrote:
>
> Hi Tom,
>
>
> On 16.12.2015 05:55, Tom Stellard via llvm-commits wrote:
> > Author: tstellar
> > Date: Tue Dec 15 14:55:55 2015
> > New Revision: 255672
> >
> > URL: http://llvm.org/viewvc/llvm-project?rev=255672&view=rev
> > Log:
> > AMDGPU/SI: Select constant loads with non-uniform addresses to MUBUF instructions
>
> This change broke the piglit test
> spec at arb_gpu_shader5@execution at ubo_array_indexing@fs-masked for me on
> Kaveri, see the assertion failure and backtrace below. The LLVM IR is
> attached.
>
Hi,
This should be fixed now upstream.
-Tom
>
> P.S. I get a different failure for the attached IR when asking llc to
> generate code for a VI GPU, but I'm not sure if the radeonsi driver
> would generate the same IR for VI.
>
>
> shader_runner: /home/daenzer/src/llvm-git/llvm/include/llvm/MC/MCInstrInfo.h:46: const llvm::MCInstrDesc& llvm::MCInstrInfo::get(unsigned int) const: Assertion `Opcode < NumOpcodes && "Invalid opcode!"' failed.
>
> Program received signal SIGABRT, Aborted.
> 0x00007ffff55f6107 in __GI_raise (sig=sig at entry=6) at ../nptl/sysdeps/unix/sysv/linux/raise.c:56
> 56 ../nptl/sysdeps/unix/sysv/linux/raise.c: No such file or directory.
> (gdb) bt
> #0 0x00007ffff55f6107 in __GI_raise (sig=sig at entry=6) at ../nptl/sysdeps/unix/sysv/linux/raise.c:56
> #1 0x00007ffff55f74e8 in __GI_abort () at abort.c:89
> #2 0x00007ffff55ef226 in __assert_fail_base (fmt=0x7ffff5725d08 "%s%s%s:%u: %s%sAssertion `%s' failed.\n%n", assertion=assertion at entry=0x7fffeec2c7a0 "Opcode < NumOpcodes && \"Invalid opcode!\"",
> file=file at entry=0x7fffeec2c760 "/home/daenzer/src/llvm-git/llvm/include/llvm/MC/MCInstrInfo.h", line=line at entry=46,
> function=function at entry=0x7fffef5eec20 <_ZZNK4llvm11MCInstrInfo3getEjE19__PRETTY_FUNCTION__> "const llvm::MCInstrDesc& llvm::MCInstrInfo::get(unsigned int) const") at assert.c:92
> #3 0x00007ffff55ef2d2 in __GI___assert_fail (assertion=assertion at entry=0x7fffeec2c7a0 "Opcode < NumOpcodes && \"Invalid opcode!\"", file=file at entry=0x7fffeec2c760 "/home/daenzer/src/llvm-git/llvm/include/llvm/MC/MCInstrInfo.h", line=line at entry=46,
> function=function at entry=0x7fffef5eec20 <_ZZNK4llvm11MCInstrInfo3getEjE19__PRETTY_FUNCTION__> "const llvm::MCInstrDesc& llvm::MCInstrInfo::get(unsigned int) const") at assert.c:101
> #4 0x00007fffeebf2d63 in llvm::MCInstrInfo::get (this=<optimized out>, this=<optimized out>, Opcode=<optimized out>) at /home/daenzer/src/llvm-git/llvm/include/llvm/MC/MCInstrInfo.h:46
> #5 0x00007fffeebfdc97 in llvm::MCInstrInfo::get (this=<optimized out>, this=<optimized out>, Opcode=<optimized out>) at /home/daenzer/src/llvm-git/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp:2342
> #6 llvm::SIInstrInfo::moveSMRDToVALU (this=this at entry=0x79f2f0, MI=MI at entry=0x8a3ab0, MRI=..., Worklist=...) at /home/daenzer/src/llvm-git/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp:2312
> #7 0x00007fffeebfe960 in llvm::SIInstrInfo::moveToVALU (this=this at entry=0x79f2f0, TopInst=...) at /home/daenzer/src/llvm-git/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp:2430
> #8 0x00007fffeebe07b7 in (anonymous namespace)::SIFixSGPRCopies::runOnMachineFunction (this=<optimized out>, MF=...) at /home/daenzer/src/llvm-git/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp:269
> #9 0x00007fffee4d4b79 in llvm::FPPassManager::runOnFunction (this=0x82fff0, F=...) at /home/daenzer/src/llvm-git/llvm/lib/IR/LegacyPassManager.cpp:1537
> #10 0x00007fffee4d4f2b in llvm::FPPassManager::runOnModule (this=0x82fff0, M=...) at /home/daenzer/src/llvm-git/llvm/lib/IR/LegacyPassManager.cpp:1558
> #11 0x00007fffee4d47b4 in (anonymous namespace)::MPPassManager::runOnModule (M=..., this=<optimized out>) at /home/daenzer/src/llvm-git/llvm/lib/IR/LegacyPassManager.cpp:1614
> #12 llvm::legacy::PassManagerImpl::run (this=0x835ea0, M=...) at /home/daenzer/src/llvm-git/llvm/lib/IR/LegacyPassManager.cpp:1717
> #13 0x00007fffee4d498e in llvm::legacy::PassManager::run (this=this at entry=0x7fffffff55b0, M=...) at /home/daenzer/src/llvm-git/llvm/lib/IR/LegacyPassManager.cpp:1748
> #14 0x00007fffee5ca131 in LLVMTargetMachineEmit (T=T at entry=0x79e2f0, M=M at entry=0x831ad0, OS=..., codegen=codegen at entry=LLVMObjectFile, ErrorMessage=ErrorMessage at entry=0x7fffffff5890) at /home/daenzer/src/llvm-git/llvm/lib/Target/TargetMachineC.cpp:216
> #15 0x00007fffee5ca4bc in LLVMTargetMachineEmitToMemoryBuffer (T=T at entry=0x79e2f0, M=M at entry=0x831ad0, codegen=codegen at entry=LLVMObjectFile, ErrorMessage=ErrorMessage at entry=0x7fffffff5890, OutMemBuf=OutMemBuf at entry=0x7fffffff5898)
> at /home/daenzer/src/llvm-git/llvm/lib/Target/TargetMachineC.cpp:240
> #16 0x00007ffff0ec68cf in radeon_llvm_compile (M=M at entry=0x831ad0, binary=binary at entry=0x7079b0, gpu_family=<optimized out>, dump_ir=dump_ir at entry=false, dump_asm=dump_asm at entry=false, tm=tm at entry=0x79e2f0)
> at ../../../../../src/gallium/drivers/radeon/radeon_llvm_emit.c:184
> #17 0x00007ffff0e0be1e in si_compile_llvm (sscreen=sscreen at entry=0x691b70, shader=shader at entry=0x707980, tm=tm at entry=0x79e2f0, mod=0x831ad0) at ../../../../../src/gallium/drivers/radeonsi/si_shader.c:3888
> #18 0x00007ffff0e0cb40 in si_shader_create (sscreen=0x691b70, tm=0x79e2f0, shader=shader at entry=0x707980) at ../../../../../src/gallium/drivers/radeonsi/si_shader.c:4185
> #19 0x00007ffff0e1afec in si_shader_select (ctx=ctx at entry=0x63e580, state=state at entry=0x63f190) at ../../../../../src/gallium/drivers/radeonsi/si_state_shaders.c:619
> #20 0x00007ffff0e1c450 in si_update_shaders (sctx=sctx at entry=0x63e580) at ../../../../../src/gallium/drivers/radeonsi/si_state_shaders.c:1562
> #21 0x00007ffff0e173bf in si_draw_vbo (ctx=0x63e580, info=0x7fffffffdff0) at ../../../../../src/gallium/drivers/radeonsi/si_state_draw.c:788
> #22 0x00007ffff0bd33a6 in u_vbuf_draw_vbo (mgr=0x807f10, info=0x7fffffffdff0) at ../../../../src/gallium/auxiliary/util/u_vbuf.c:1162
> #23 0x00007ffff0973d9b in st_draw_vbo (ctx=0x7ab530, prims=<optimized out>, nr_prims=<optimized out>, ib=0x0, index_bounds_valid=<optimized out>, min_index=0, max_index=3, tfb_vertcount=0x0, stream=0, indirect=0x0)
> at ../../../src/mesa/state_tracker/st_draw.c:291
> #24 0x00007ffff093498a in vbo_draw_arrays (ctx=0x7ab530, mode=5, start=0, count=4, numInstances=1, baseInstance=0) at ../../../src/mesa/vbo/vbo_exec_array.c:645
> #25 0x00007ffff7aafb3a in stub_glDrawArrays (mode=5, first=0, count=4) at tests/util/piglit-dispatch-gen.c:11733
> #26 0x00007ffff7b1a5a1 in piglit_draw_rect_from_arrays (verts=0x7fffffffe1d0, tex=0x0, use_patches=false) at tests/util/piglit-util-gl.c:782
> #27 0x00007ffff7b1a728 in piglit_draw_rect_custom (x=-1, y=-1, w=1, h=1, use_patches=false) at tests/util/piglit-util-gl.c:824
> #28 0x00007ffff7b1a774 in piglit_draw_rect (x=-1, y=-1, w=1, h=1) at tests/util/piglit-util-gl.c:833
> #29 0x000000000040b8b7 in piglit_display () at tests/shaders/shader_runner.c:2811
> #30 0x00007ffff7b3faa3 in process_next_event (x11_fw=0x625c20) at tests/util/piglit-framework-gl/piglit_x11_framework.c:137
> #31 0x00007ffff7b3fb46 in enter_event_loop (winsys_fw=0x625c20) at tests/util/piglit-framework-gl/piglit_x11_framework.c:153
> #32 0x00007ffff7b3ea2f in run_test (gl_fw=0x625c20, argc=2, argv=0x7fffffffe6f8) at tests/util/piglit-framework-gl/piglit_winsys_framework.c:88
> #33 0x00007ffff7b23557 in piglit_gl_test_run (argc=2, argv=0x7fffffffe6f8, config=0x7fffffffe5c0) at tests/util/piglit-framework-gl.c:199
> #34 0x0000000000405a81 in main (argc=2, argv=0x7fffffffe6f8) at tests/shaders/shader_runner.c:54
>
>
> --
> Earthling Michel Dänzer | http://www.amd.com
> Libre software enthusiast | Mesa and X developer
> ; ModuleID = 'tgsi'
>
> define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
> main_body:
> %23 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
> %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0
> %25 = fptosi float %15 to i32
> %26 = and i32 %25, 3
> %27 = icmp eq i32 %26, 2
> br i1 %27, label %IF, label %ELSE
>
> IF: ; preds = %main_body
> %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 16)
> %29 = bitcast float %28 to i32
> %30 = add i32 %29, 1
> %31 = sext i32 %30 to i64
> %32 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 %31
> %33 = load <16 x i8>, <16 x i8> addrspace(2)* %32, align 16, !tbaa !0
> %34 = call float @llvm.SI.load.const(<16 x i8> %33, i32 0)
> %35 = add i32 %29, 1
> %36 = sext i32 %35 to i64
> %37 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 %36
> %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0
> %39 = call float @llvm.SI.load.const(<16 x i8> %38, i32 4)
> %40 = add i32 %29, 1
> %41 = sext i32 %40 to i64
> %42 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 %41
> %43 = load <16 x i8>, <16 x i8> addrspace(2)* %42, align 16, !tbaa !0
> %44 = call float @llvm.SI.load.const(<16 x i8> %43, i32 8)
> %45 = add i32 %29, 1
> %46 = sext i32 %45 to i64
> %47 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 %46
> %48 = load <16 x i8>, <16 x i8> addrspace(2)* %47, align 16, !tbaa !0
> %49 = call float @llvm.SI.load.const(<16 x i8> %48, i32 12)
> br label %ENDIF
>
> ELSE: ; preds = %main_body
> %50 = call float @llvm.SI.load.const(<16 x i8> %24, i32 0)
> %51 = bitcast float %50 to i32
> %52 = add i32 %51, 1
> %53 = sext i32 %52 to i64
> %54 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 %53
> %55 = load <16 x i8>, <16 x i8> addrspace(2)* %54, align 16, !tbaa !0
> %56 = call float @llvm.SI.load.const(<16 x i8> %55, i32 0)
> %57 = add i32 %51, 1
> %58 = sext i32 %57 to i64
> %59 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 %58
> %60 = load <16 x i8>, <16 x i8> addrspace(2)* %59, align 16, !tbaa !0
> %61 = call float @llvm.SI.load.const(<16 x i8> %60, i32 4)
> %62 = add i32 %51, 1
> %63 = sext i32 %62 to i64
> %64 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 %63
> %65 = load <16 x i8>, <16 x i8> addrspace(2)* %64, align 16, !tbaa !0
> %66 = call float @llvm.SI.load.const(<16 x i8> %65, i32 8)
> %67 = add i32 %51, 1
> %68 = sext i32 %67 to i64
> %69 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 %68
> %70 = load <16 x i8>, <16 x i8> addrspace(2)* %69, align 16, !tbaa !0
> %71 = call float @llvm.SI.load.const(<16 x i8> %70, i32 12)
> br label %ENDIF
>
> ENDIF: ; preds = %ELSE, %IF
> %temp6.0 = phi float [ %44, %IF ], [ %66, %ELSE ]
> %temp7.0 = phi float [ %49, %IF ], [ %71, %ELSE ]
> %temp5.0 = phi float [ %39, %IF ], [ %61, %ELSE ]
> %temp4.0 = phi float [ %34, %IF ], [ %56, %ELSE ]
> %72 = call i32 @llvm.SI.packf16(float %temp4.0, float %temp5.0)
> %73 = bitcast i32 %72 to float
> %74 = call i32 @llvm.SI.packf16(float %temp6.0, float %temp7.0)
> %75 = bitcast i32 %74 to float
> call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %73, float %75, float %73, float %75)
> ret void
> }
>
> ; Function Attrs: nounwind readnone
> declare float @llvm.SI.load.const(<16 x i8>, i32) #1
>
> ; Function Attrs: nounwind readnone
> declare i32 @llvm.SI.packf16(float, float) #1
>
> declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
>
> attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
> attributes #1 = { nounwind readnone }
>
> !0 = !{!"const", null, i32 1}
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list