[llvm] r276942 - Don't invoke getName() from Function::isIntrinsic().
Michel Dänzer via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 28 00:25:34 PDT 2016
On 28.07.2016 08:46, Justin Lebar via llvm-commits wrote:
> Author: jlebar
> Date: Wed Jul 27 18:46:57 2016
> New Revision: 276942
>
> URL: http://llvm.org/viewvc/llvm-project?rev=276942&view=rev
> Log:
> Don't invoke getName() from Function::isIntrinsic().
>
> Summary:
> getName() involves a hashtable lookup, so is expensive given how
> frequently isIntrinsic() is called. (In particular, many users cast to
> IntrinsicInstr or one of its subclasses before calling
> getIntrinsicID().)
>
> This has an incidental functional change: Before, isIntrinsic() would
> return true for any function whose name started with "llvm.", even if it
> wasn't properly an intrinsic. The new behavior seems more correct to
> me, because it's strange to say that isIntrinsic() is true, but
> getIntrinsicId() returns "not an intrinsic".
>
> Some callers want the old behavior -- they want to know whether the
> caller is a recognized intrinsic, or might be one in some other version
> of LLVM. For them, we added Function::hasLLVMReservedName(), which
> checks whether the name starts with "llvm.".
>
> This change is good for a 1.5% e2e speedup compiling a large Eigen
> benchmark.
>
> Reviewers: bogner
This change broke the piglit test
spec at arb_arrays_of_arrays@execution at ubo@fs-mixed-const-nonconst with the
radeonsi driver, see the backtrace below and the attached LLVM IR.
Thread 4 "si_shader:1" received signal SIGABRT, Aborted.
[Switching to Thread 0x7fffebf67700 (LWP 30217)]
0x00007ffff56091c8 in __GI_raise (sig=sig at entry=6) at ../sysdeps/unix/sysv/linux/raise.c:54
54 ../sysdeps/unix/sysv/linux/raise.c: No such file or directory.
(gdb) bt
#0 0x00007ffff56091c8 in __GI_raise (sig=sig at entry=6) at ../sysdeps/unix/sysv/linux/raise.c:54
#1 0x00007ffff560a64a in __GI_abort () at abort.c:89
#2 0x00007ffff5602107 in __assert_fail_base (fmt=<optimized out>, assertion=assertion at entry=0x7fffeee991e0 "PhysReg && \"Invalid SubReg for physical register\"", file=file at entry=0x7fffeee9908c "../lib/CodeGen/VirtRegMap.cpp", line=line at entry=442,
function=function at entry=0x7fffeee99b20 <(anonymous namespace)::VirtRegRewriter::rewrite()::__PRETTY_FUNCTION__> "void {anonymous}::VirtRegRewriter::rewrite()") at assert.c:92
#3 0x00007ffff56021b2 in __GI___assert_fail (assertion=assertion at entry=0x7fffeee991e0 "PhysReg && \"Invalid SubReg for physical register\"", file=file at entry=0x7fffeee9908c "../lib/CodeGen/VirtRegMap.cpp", line=line at entry=442,
function=function at entry=0x7fffeee99b20 <(anonymous namespace)::VirtRegRewriter::rewrite()::__PRETTY_FUNCTION__> "void {anonymous}::VirtRegRewriter::rewrite()") at assert.c:101
#4 0x00007fffedea977a in (anonymous namespace)::VirtRegRewriter::rewrite (this=this at entry=0x7fffdc020770) at ../lib/CodeGen/VirtRegMap.cpp:442
#5 0x00007fffedeaaaab in (anonymous namespace)::VirtRegRewriter::runOnMachineFunction (this=0x7fffdc020770, fn=...) at ../lib/CodeGen/VirtRegMap.cpp:232
#6 0x00007fffedd1c84c in llvm::MachineFunctionPass::runOnFunction (this=0x7fffdc020770, F=...) at ../lib/CodeGen/MachineFunctionPass.cpp:60
#7 0x00007fffedb9bfc9 in llvm::FPPassManager::runOnFunction (this=0x7fffdc014620, F=...) at ../lib/IR/LegacyPassManager.cpp:1526
#8 0x00007fffedb9c06b in llvm::FPPassManager::runOnModule (this=0x7fffdc014620, M=...) at ../lib/IR/LegacyPassManager.cpp:1547
#9 0x00007fffedb9bc04 in (anonymous namespace)::MPPassManager::runOnModule (M=..., this=<optimized out>) at ../lib/IR/LegacyPassManager.cpp:1603
#10 llvm::legacy::PassManagerImpl::run (this=0x7fffdc00abc0, M=...) at ../lib/IR/LegacyPassManager.cpp:1706
#11 0x00007fffedb9bdde in llvm::legacy::PassManager::run (this=this at entry=0x7fffebf5ef60, M=...) at ../lib/IR/LegacyPassManager.cpp:1737
#12 0x00007fffee90e691 in LLVMTargetMachineEmit (T=T at entry=0x69c5f0, M=M at entry=0x7fffdc002410, OS=..., codegen=codegen at entry=LLVMObjectFile, ErrorMessage=ErrorMessage at entry=0x7fffebf5f200) at ../lib/Target/TargetMachineC.cpp:205
#13 0x00007fffee90ea1c in LLVMTargetMachineEmitToMemoryBuffer (T=T at entry=0x69c5f0, M=M at entry=0x7fffdc002410, codegen=codegen at entry=LLVMObjectFile, ErrorMessage=ErrorMessage at entry=0x7fffebf5f200, OutMemBuf=OutMemBuf at entry=0x7fffebf5f208)
at ../lib/Target/TargetMachineC.cpp:229
#14 0x00007ffff0edf326 in radeon_llvm_compile (M=M at entry=0x7fffdc002410, binary=binary at entry=0x7fffdc000950, tm=tm at entry=0x69c5f0, debug=debug at entry=0x0) at ../../../../../src/gallium/drivers/radeon/radeon_llvm_emit.c:215
#15 0x00007ffff0e13b6f in si_compile_llvm (sscreen=sscreen at entry=0x696850, binary=binary at entry=0x7fffdc000950, conf=conf at entry=0x7fffdc0009b0, tm=tm at entry=0x69c5f0, mod=mod at entry=0x7fffdc002410, debug=debug at entry=0x0, processor=1,
name=0x7ffff1001aaf "TGSI shader") at ../../../../../src/gallium/drivers/radeonsi/si_shader.c:6312
#16 0x00007ffff0e17272 in si_compile_tgsi_shader (sscreen=sscreen at entry=0x696850, tm=tm at entry=0x69c5f0, shader=shader at entry=0x7fffdc0008c0, is_monolithic=is_monolithic at entry=false, debug=debug at entry=0x0)
at ../../../../../src/gallium/drivers/radeonsi/si_shader.c:6719
#17 0x00007ffff0e27bed in si_init_shader_selector_async (job=job at entry=0x81d910, thread_index=thread_index at entry=1) at ../../../../../src/gallium/drivers/radeonsi/si_state_shaders.c:1160
--
Earthling Michel Dänzer | http://www.amd.com
Libre software enthusiast | Mesa and X developer
-------------- next part --------------
source_filename = "tgsi"
target triple = "amdgcn--"
define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 {
main_body:
%23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0
%24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !invariant.load !0
%25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 0)
%26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 16)
%27 = bitcast float %25 to i32
%28 = shl i32 %27, 1
%29 = or i32 %28, 1
%30 = bitcast float %26 to i32
%31 = add i32 %29, 1
%32 = and i32 %31, 14
%33 = zext i32 %32 to i64
%34 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 %33, !amdgpu.uniform !0
%35 = load <16 x i8>, <16 x i8> addrspace(2)* %34, align 16, !invariant.load !0
%36 = shl i32 %30, 4
%37 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %36)
%38 = add i32 %29, 1
%39 = and i32 %38, 14
%40 = zext i32 %39 to i64
%41 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 %40, !amdgpu.uniform !0
%42 = load <16 x i8>, <16 x i8> addrspace(2)* %41, align 16, !invariant.load !0
%43 = shl i32 %30, 4
%44 = or i32 %43, 4
%45 = call float @llvm.SI.load.const(<16 x i8> %42, i32 %44)
%46 = add i32 %29, 1
%47 = and i32 %46, 14
%48 = zext i32 %47 to i64
%49 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 %48, !amdgpu.uniform !0
%50 = load <16 x i8>, <16 x i8> addrspace(2)* %49, align 16, !invariant.load !0
%51 = shl i32 %30, 4
%52 = or i32 %51, 8
%53 = call float @llvm.SI.load.const(<16 x i8> %50, i32 %52)
%54 = add i32 %29, 1
%55 = and i32 %54, 14
%56 = zext i32 %55 to i64
%57 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 %56, !amdgpu.uniform !0
%58 = load <16 x i8>, <16 x i8> addrspace(2)* %57, align 16, !invariant.load !0
%59 = shl i32 %30, 4
%60 = or i32 %59, 12
%61 = call float @llvm.SI.load.const(<16 x i8> %58, i32 %60)
%62 = bitcast float %5 to i32
%63 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %62, 10
%64 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %63, float %37, 11
%65 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %64, float %45, 12
%66 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %65, float %53, 13
%67 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %66, float %61, 14
%68 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %67, float %21, 24
ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %68
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
attributes #0 = { "InitialPSInputAddr"="36983" }
attributes #1 = { nounwind readnone }
!0 = !{}
More information about the llvm-commits
mailing list