[llvm] r255672 - AMDGPU/SI: Select constant loads with non-uniform addresses to MUBUF instructions

Tom Stellard via llvm-commits llvm-commits at lists.llvm.org
Fri Dec 18 18:58:45 PST 2015


On Wed, Dec 16, 2015 at 05:16:22PM +0900, Michel Dänzer via llvm-commits wrote:
> 
> Hi Tom,
> 
> 
> On 16.12.2015 05:55, Tom Stellard via llvm-commits wrote:
> > Author: tstellar
> > Date: Tue Dec 15 14:55:55 2015
> > New Revision: 255672
> > 
> > URL: http://llvm.org/viewvc/llvm-project?rev=255672&view=rev
> > Log:
> > AMDGPU/SI: Select constant loads with non-uniform addresses to MUBUF instructions
> 
> This change broke the piglit test
> spec at arb_gpu_shader5@execution at ubo_array_indexing@fs-masked for me on
> Kaveri, see the assertion failure and backtrace below. The LLVM IR is
> attached.
> 

Hi,

This should be fixed now upstream.

-Tom

> 
> P.S. I get a different failure for the attached IR when asking llc to
> generate code for a VI GPU, but I'm not sure if the radeonsi driver
> would generate the same IR for VI.
> 
> 
> shader_runner: /home/daenzer/src/llvm-git/llvm/include/llvm/MC/MCInstrInfo.h:46: const llvm::MCInstrDesc& llvm::MCInstrInfo::get(unsigned int) const: Assertion `Opcode < NumOpcodes && "Invalid opcode!"' failed.
> 
> Program received signal SIGABRT, Aborted.
> 0x00007ffff55f6107 in __GI_raise (sig=sig at entry=6) at ../nptl/sysdeps/unix/sysv/linux/raise.c:56
> 56	../nptl/sysdeps/unix/sysv/linux/raise.c: No such file or directory.
> (gdb) bt
> #0  0x00007ffff55f6107 in __GI_raise (sig=sig at entry=6) at ../nptl/sysdeps/unix/sysv/linux/raise.c:56
> #1  0x00007ffff55f74e8 in __GI_abort () at abort.c:89
> #2  0x00007ffff55ef226 in __assert_fail_base (fmt=0x7ffff5725d08 "%s%s%s:%u: %s%sAssertion `%s' failed.\n%n", assertion=assertion at entry=0x7fffeec2c7a0 "Opcode < NumOpcodes && \"Invalid opcode!\"", 
>     file=file at entry=0x7fffeec2c760 "/home/daenzer/src/llvm-git/llvm/include/llvm/MC/MCInstrInfo.h", line=line at entry=46, 
>     function=function at entry=0x7fffef5eec20 <_ZZNK4llvm11MCInstrInfo3getEjE19__PRETTY_FUNCTION__> "const llvm::MCInstrDesc& llvm::MCInstrInfo::get(unsigned int) const") at assert.c:92
> #3  0x00007ffff55ef2d2 in __GI___assert_fail (assertion=assertion at entry=0x7fffeec2c7a0 "Opcode < NumOpcodes && \"Invalid opcode!\"", file=file at entry=0x7fffeec2c760 "/home/daenzer/src/llvm-git/llvm/include/llvm/MC/MCInstrInfo.h", line=line at entry=46, 
>     function=function at entry=0x7fffef5eec20 <_ZZNK4llvm11MCInstrInfo3getEjE19__PRETTY_FUNCTION__> "const llvm::MCInstrDesc& llvm::MCInstrInfo::get(unsigned int) const") at assert.c:101
> #4  0x00007fffeebf2d63 in llvm::MCInstrInfo::get (this=<optimized out>, this=<optimized out>, Opcode=<optimized out>) at /home/daenzer/src/llvm-git/llvm/include/llvm/MC/MCInstrInfo.h:46
> #5  0x00007fffeebfdc97 in llvm::MCInstrInfo::get (this=<optimized out>, this=<optimized out>, Opcode=<optimized out>) at /home/daenzer/src/llvm-git/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp:2342
> #6  llvm::SIInstrInfo::moveSMRDToVALU (this=this at entry=0x79f2f0, MI=MI at entry=0x8a3ab0, MRI=..., Worklist=...) at /home/daenzer/src/llvm-git/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp:2312
> #7  0x00007fffeebfe960 in llvm::SIInstrInfo::moveToVALU (this=this at entry=0x79f2f0, TopInst=...) at /home/daenzer/src/llvm-git/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp:2430
> #8  0x00007fffeebe07b7 in (anonymous namespace)::SIFixSGPRCopies::runOnMachineFunction (this=<optimized out>, MF=...) at /home/daenzer/src/llvm-git/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp:269
> #9  0x00007fffee4d4b79 in llvm::FPPassManager::runOnFunction (this=0x82fff0, F=...) at /home/daenzer/src/llvm-git/llvm/lib/IR/LegacyPassManager.cpp:1537
> #10 0x00007fffee4d4f2b in llvm::FPPassManager::runOnModule (this=0x82fff0, M=...) at /home/daenzer/src/llvm-git/llvm/lib/IR/LegacyPassManager.cpp:1558
> #11 0x00007fffee4d47b4 in (anonymous namespace)::MPPassManager::runOnModule (M=..., this=<optimized out>) at /home/daenzer/src/llvm-git/llvm/lib/IR/LegacyPassManager.cpp:1614
> #12 llvm::legacy::PassManagerImpl::run (this=0x835ea0, M=...) at /home/daenzer/src/llvm-git/llvm/lib/IR/LegacyPassManager.cpp:1717
> #13 0x00007fffee4d498e in llvm::legacy::PassManager::run (this=this at entry=0x7fffffff55b0, M=...) at /home/daenzer/src/llvm-git/llvm/lib/IR/LegacyPassManager.cpp:1748
> #14 0x00007fffee5ca131 in LLVMTargetMachineEmit (T=T at entry=0x79e2f0, M=M at entry=0x831ad0, OS=..., codegen=codegen at entry=LLVMObjectFile, ErrorMessage=ErrorMessage at entry=0x7fffffff5890) at /home/daenzer/src/llvm-git/llvm/lib/Target/TargetMachineC.cpp:216
> #15 0x00007fffee5ca4bc in LLVMTargetMachineEmitToMemoryBuffer (T=T at entry=0x79e2f0, M=M at entry=0x831ad0, codegen=codegen at entry=LLVMObjectFile, ErrorMessage=ErrorMessage at entry=0x7fffffff5890, OutMemBuf=OutMemBuf at entry=0x7fffffff5898)
>     at /home/daenzer/src/llvm-git/llvm/lib/Target/TargetMachineC.cpp:240
> #16 0x00007ffff0ec68cf in radeon_llvm_compile (M=M at entry=0x831ad0, binary=binary at entry=0x7079b0, gpu_family=<optimized out>, dump_ir=dump_ir at entry=false, dump_asm=dump_asm at entry=false, tm=tm at entry=0x79e2f0)
>     at ../../../../../src/gallium/drivers/radeon/radeon_llvm_emit.c:184
> #17 0x00007ffff0e0be1e in si_compile_llvm (sscreen=sscreen at entry=0x691b70, shader=shader at entry=0x707980, tm=tm at entry=0x79e2f0, mod=0x831ad0) at ../../../../../src/gallium/drivers/radeonsi/si_shader.c:3888
> #18 0x00007ffff0e0cb40 in si_shader_create (sscreen=0x691b70, tm=0x79e2f0, shader=shader at entry=0x707980) at ../../../../../src/gallium/drivers/radeonsi/si_shader.c:4185
> #19 0x00007ffff0e1afec in si_shader_select (ctx=ctx at entry=0x63e580, state=state at entry=0x63f190) at ../../../../../src/gallium/drivers/radeonsi/si_state_shaders.c:619
> #20 0x00007ffff0e1c450 in si_update_shaders (sctx=sctx at entry=0x63e580) at ../../../../../src/gallium/drivers/radeonsi/si_state_shaders.c:1562
> #21 0x00007ffff0e173bf in si_draw_vbo (ctx=0x63e580, info=0x7fffffffdff0) at ../../../../../src/gallium/drivers/radeonsi/si_state_draw.c:788
> #22 0x00007ffff0bd33a6 in u_vbuf_draw_vbo (mgr=0x807f10, info=0x7fffffffdff0) at ../../../../src/gallium/auxiliary/util/u_vbuf.c:1162
> #23 0x00007ffff0973d9b in st_draw_vbo (ctx=0x7ab530, prims=<optimized out>, nr_prims=<optimized out>, ib=0x0, index_bounds_valid=<optimized out>, min_index=0, max_index=3, tfb_vertcount=0x0, stream=0, indirect=0x0)
>     at ../../../src/mesa/state_tracker/st_draw.c:291
> #24 0x00007ffff093498a in vbo_draw_arrays (ctx=0x7ab530, mode=5, start=0, count=4, numInstances=1, baseInstance=0) at ../../../src/mesa/vbo/vbo_exec_array.c:645
> #25 0x00007ffff7aafb3a in stub_glDrawArrays (mode=5, first=0, count=4) at tests/util/piglit-dispatch-gen.c:11733
> #26 0x00007ffff7b1a5a1 in piglit_draw_rect_from_arrays (verts=0x7fffffffe1d0, tex=0x0, use_patches=false) at tests/util/piglit-util-gl.c:782
> #27 0x00007ffff7b1a728 in piglit_draw_rect_custom (x=-1, y=-1, w=1, h=1, use_patches=false) at tests/util/piglit-util-gl.c:824
> #28 0x00007ffff7b1a774 in piglit_draw_rect (x=-1, y=-1, w=1, h=1) at tests/util/piglit-util-gl.c:833
> #29 0x000000000040b8b7 in piglit_display () at tests/shaders/shader_runner.c:2811
> #30 0x00007ffff7b3faa3 in process_next_event (x11_fw=0x625c20) at tests/util/piglit-framework-gl/piglit_x11_framework.c:137
> #31 0x00007ffff7b3fb46 in enter_event_loop (winsys_fw=0x625c20) at tests/util/piglit-framework-gl/piglit_x11_framework.c:153
> #32 0x00007ffff7b3ea2f in run_test (gl_fw=0x625c20, argc=2, argv=0x7fffffffe6f8) at tests/util/piglit-framework-gl/piglit_winsys_framework.c:88
> #33 0x00007ffff7b23557 in piglit_gl_test_run (argc=2, argv=0x7fffffffe6f8, config=0x7fffffffe5c0) at tests/util/piglit-framework-gl.c:199
> #34 0x0000000000405a81 in main (argc=2, argv=0x7fffffffe6f8) at tests/shaders/shader_runner.c:54
> 
> 
> -- 
> Earthling Michel Dänzer               |               http://www.amd.com
> Libre software enthusiast             |             Mesa and X developer

> ; ModuleID = 'tgsi'
> 
> define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
> main_body:
>   %23 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
>   %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0
>   %25 = fptosi float %15 to i32
>   %26 = and i32 %25, 3
>   %27 = icmp eq i32 %26, 2
>   br i1 %27, label %IF, label %ELSE
> 
> IF:                                               ; preds = %main_body
>   %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 16)
>   %29 = bitcast float %28 to i32
>   %30 = add i32 %29, 1
>   %31 = sext i32 %30 to i64
>   %32 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 %31
>   %33 = load <16 x i8>, <16 x i8> addrspace(2)* %32, align 16, !tbaa !0
>   %34 = call float @llvm.SI.load.const(<16 x i8> %33, i32 0)
>   %35 = add i32 %29, 1
>   %36 = sext i32 %35 to i64
>   %37 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 %36
>   %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0
>   %39 = call float @llvm.SI.load.const(<16 x i8> %38, i32 4)
>   %40 = add i32 %29, 1
>   %41 = sext i32 %40 to i64
>   %42 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 %41
>   %43 = load <16 x i8>, <16 x i8> addrspace(2)* %42, align 16, !tbaa !0
>   %44 = call float @llvm.SI.load.const(<16 x i8> %43, i32 8)
>   %45 = add i32 %29, 1
>   %46 = sext i32 %45 to i64
>   %47 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 %46
>   %48 = load <16 x i8>, <16 x i8> addrspace(2)* %47, align 16, !tbaa !0
>   %49 = call float @llvm.SI.load.const(<16 x i8> %48, i32 12)
>   br label %ENDIF
> 
> ELSE:                                             ; preds = %main_body
>   %50 = call float @llvm.SI.load.const(<16 x i8> %24, i32 0)
>   %51 = bitcast float %50 to i32
>   %52 = add i32 %51, 1
>   %53 = sext i32 %52 to i64
>   %54 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 %53
>   %55 = load <16 x i8>, <16 x i8> addrspace(2)* %54, align 16, !tbaa !0
>   %56 = call float @llvm.SI.load.const(<16 x i8> %55, i32 0)
>   %57 = add i32 %51, 1
>   %58 = sext i32 %57 to i64
>   %59 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 %58
>   %60 = load <16 x i8>, <16 x i8> addrspace(2)* %59, align 16, !tbaa !0
>   %61 = call float @llvm.SI.load.const(<16 x i8> %60, i32 4)
>   %62 = add i32 %51, 1
>   %63 = sext i32 %62 to i64
>   %64 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 %63
>   %65 = load <16 x i8>, <16 x i8> addrspace(2)* %64, align 16, !tbaa !0
>   %66 = call float @llvm.SI.load.const(<16 x i8> %65, i32 8)
>   %67 = add i32 %51, 1
>   %68 = sext i32 %67 to i64
>   %69 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 %68
>   %70 = load <16 x i8>, <16 x i8> addrspace(2)* %69, align 16, !tbaa !0
>   %71 = call float @llvm.SI.load.const(<16 x i8> %70, i32 12)
>   br label %ENDIF
> 
> ENDIF:                                            ; preds = %ELSE, %IF
>   %temp6.0 = phi float [ %44, %IF ], [ %66, %ELSE ]
>   %temp7.0 = phi float [ %49, %IF ], [ %71, %ELSE ]
>   %temp5.0 = phi float [ %39, %IF ], [ %61, %ELSE ]
>   %temp4.0 = phi float [ %34, %IF ], [ %56, %ELSE ]
>   %72 = call i32 @llvm.SI.packf16(float %temp4.0, float %temp5.0)
>   %73 = bitcast i32 %72 to float
>   %74 = call i32 @llvm.SI.packf16(float %temp6.0, float %temp7.0)
>   %75 = bitcast i32 %74 to float
>   call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %73, float %75, float %73, float %75)
>   ret void
> }
> 
> ; Function Attrs: nounwind readnone
> declare float @llvm.SI.load.const(<16 x i8>, i32) #1
> 
> ; Function Attrs: nounwind readnone
> declare i32 @llvm.SI.packf16(float, float) #1
> 
> declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
> 
> attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
> attributes #1 = { nounwind readnone }
> 
> !0 = !{!"const", null, i32 1}

> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits



More information about the llvm-commits mailing list