[llvm] r276942 - Don't invoke getName() from Function::isIntrinsic().

Thu Jul 28 00:25:34 PDT 2016

On 28.07.2016 08:46, Justin Lebar via llvm-commits wrote:
> Author: jlebar
> Date: Wed Jul 27 18:46:57 2016
> New Revision: 276942
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=276942&view=rev
> Log:
> Don't invoke getName() from Function::isIntrinsic().
> 
> Summary:
> getName() involves a hashtable lookup, so is expensive given how
> frequently isIntrinsic() is called.  (In particular, many users cast to
> IntrinsicInstr or one of its subclasses before calling
> getIntrinsicID().)
> 
> This has an incidental functional change: Before, isIntrinsic() would
> return true for any function whose name started with "llvm.", even if it
> wasn't properly an intrinsic.  The new behavior seems more correct to
> me, because it's strange to say that isIntrinsic() is true, but
> getIntrinsicId() returns "not an intrinsic".
> 
> Some callers want the old behavior -- they want to know whether the
> caller is a recognized intrinsic, or might be one in some other version
> of LLVM.  For them, we added Function::hasLLVMReservedName(), which
> checks whether the name starts with "llvm.".
> 
> This change is good for a 1.5% e2e speedup compiling a large Eigen
> benchmark.
> 
> Reviewers: bogner

This change broke the piglit test
spec at arb_arrays_of_arrays@execution at ubo@fs-mixed-const-nonconst with the
radeonsi driver, see the backtrace below and the attached LLVM IR.

Thread 4 "si_shader:1" received signal SIGABRT, Aborted.
[Switching to Thread 0x7fffebf67700 (LWP 30217)]
0x00007ffff56091c8 in __GI_raise (sig=sig at entry=6) at ../sysdeps/unix/sysv/linux/raise.c:54
54	../sysdeps/unix/sysv/linux/raise.c: No such file or directory.
(gdb) bt
#0  0x00007ffff56091c8 in __GI_raise (sig=sig at entry=6) at ../sysdeps/unix/sysv/linux/raise.c:54
#1  0x00007ffff560a64a in __GI_abort () at abort.c:89
#2  0x00007ffff5602107 in __assert_fail_base (fmt=<optimized out>, assertion=assertion at entry=0x7fffeee991e0 "PhysReg && \"Invalid SubReg for physical register\"", file=file at entry=0x7fffeee9908c "../lib/CodeGen/VirtRegMap.cpp", line=line at entry=442, 
    function=function at entry=0x7fffeee99b20 <(anonymous namespace)::VirtRegRewriter::rewrite()::__PRETTY_FUNCTION__> "void {anonymous}::VirtRegRewriter::rewrite()") at assert.c:92
#3  0x00007ffff56021b2 in __GI___assert_fail (assertion=assertion at entry=0x7fffeee991e0 "PhysReg && \"Invalid SubReg for physical register\"", file=file at entry=0x7fffeee9908c "../lib/CodeGen/VirtRegMap.cpp", line=line at entry=442, 
    function=function at entry=0x7fffeee99b20 <(anonymous namespace)::VirtRegRewriter::rewrite()::__PRETTY_FUNCTION__> "void {anonymous}::VirtRegRewriter::rewrite()") at assert.c:101
#4  0x00007fffedea977a in (anonymous namespace)::VirtRegRewriter::rewrite (this=this at entry=0x7fffdc020770) at ../lib/CodeGen/VirtRegMap.cpp:442
#5  0x00007fffedeaaaab in (anonymous namespace)::VirtRegRewriter::runOnMachineFunction (this=0x7fffdc020770, fn=...) at ../lib/CodeGen/VirtRegMap.cpp:232
#6  0x00007fffedd1c84c in llvm::MachineFunctionPass::runOnFunction (this=0x7fffdc020770, F=...) at ../lib/CodeGen/MachineFunctionPass.cpp:60
#7  0x00007fffedb9bfc9 in llvm::FPPassManager::runOnFunction (this=0x7fffdc014620, F=...) at ../lib/IR/LegacyPassManager.cpp:1526
#8  0x00007fffedb9c06b in llvm::FPPassManager::runOnModule (this=0x7fffdc014620, M=...) at ../lib/IR/LegacyPassManager.cpp:1547
#9  0x00007fffedb9bc04 in (anonymous namespace)::MPPassManager::runOnModule (M=..., this=<optimized out>) at ../lib/IR/LegacyPassManager.cpp:1603
#10 llvm::legacy::PassManagerImpl::run (this=0x7fffdc00abc0, M=...) at ../lib/IR/LegacyPassManager.cpp:1706
#11 0x00007fffedb9bdde in llvm::legacy::PassManager::run (this=this at entry=0x7fffebf5ef60, M=...) at ../lib/IR/LegacyPassManager.cpp:1737
#12 0x00007fffee90e691 in LLVMTargetMachineEmit (T=T at entry=0x69c5f0, M=M at entry=0x7fffdc002410, OS=..., codegen=codegen at entry=LLVMObjectFile, ErrorMessage=ErrorMessage at entry=0x7fffebf5f200) at ../lib/Target/TargetMachineC.cpp:205
#13 0x00007fffee90ea1c in LLVMTargetMachineEmitToMemoryBuffer (T=T at entry=0x69c5f0, M=M at entry=0x7fffdc002410, codegen=codegen at entry=LLVMObjectFile, ErrorMessage=ErrorMessage at entry=0x7fffebf5f200, OutMemBuf=OutMemBuf at entry=0x7fffebf5f208)
    at ../lib/Target/TargetMachineC.cpp:229
#14 0x00007ffff0edf326 in radeon_llvm_compile (M=M at entry=0x7fffdc002410, binary=binary at entry=0x7fffdc000950, tm=tm at entry=0x69c5f0, debug=debug at entry=0x0) at ../../../../../src/gallium/drivers/radeon/radeon_llvm_emit.c:215
#15 0x00007ffff0e13b6f in si_compile_llvm (sscreen=sscreen at entry=0x696850, binary=binary at entry=0x7fffdc000950, conf=conf at entry=0x7fffdc0009b0, tm=tm at entry=0x69c5f0, mod=mod at entry=0x7fffdc002410, debug=debug at entry=0x0, processor=1, 
    name=0x7ffff1001aaf "TGSI shader") at ../../../../../src/gallium/drivers/radeonsi/si_shader.c:6312
#16 0x00007ffff0e17272 in si_compile_tgsi_shader (sscreen=sscreen at entry=0x696850, tm=tm at entry=0x69c5f0, shader=shader at entry=0x7fffdc0008c0, is_monolithic=is_monolithic at entry=false, debug=debug at entry=0x0)
    at ../../../../../src/gallium/drivers/radeonsi/si_shader.c:6719
#17 0x00007ffff0e27bed in si_init_shader_selector_async (job=job at entry=0x81d910, thread_index=thread_index at entry=1) at ../../../../../src/gallium/drivers/radeonsi/si_state_shaders.c:1160

-- 
Earthling Michel Dänzer               |               http://www.amd.com
Libre software enthusiast             |             Mesa and X developer
-------------- next part --------------
source_filename = "tgsi"
target triple = "amdgcn--"

define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 {
main_body:
  %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0
  %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !invariant.load !0
  %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 0)
  %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 16)
  %27 = bitcast float %25 to i32
  %28 = shl i32 %27, 1
  %29 = or i32 %28, 1
  %30 = bitcast float %26 to i32
  %31 = add i32 %29, 1
  %32 = and i32 %31, 14
  %33 = zext i32 %32 to i64
  %34 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 %33, !amdgpu.uniform !0
  %35 = load <16 x i8>, <16 x i8> addrspace(2)* %34, align 16, !invariant.load !0
  %36 = shl i32 %30, 4
  %37 = call float @llvm.SI.load.const(<16 x i8> %35, i32 %36)
  %38 = add i32 %29, 1
  %39 = and i32 %38, 14
  %40 = zext i32 %39 to i64
  %41 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 %40, !amdgpu.uniform !0
  %42 = load <16 x i8>, <16 x i8> addrspace(2)* %41, align 16, !invariant.load !0
  %43 = shl i32 %30, 4
  %44 = or i32 %43, 4
  %45 = call float @llvm.SI.load.const(<16 x i8> %42, i32 %44)
  %46 = add i32 %29, 1
  %47 = and i32 %46, 14
  %48 = zext i32 %47 to i64
  %49 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 %48, !amdgpu.uniform !0
  %50 = load <16 x i8>, <16 x i8> addrspace(2)* %49, align 16, !invariant.load !0
  %51 = shl i32 %30, 4
  %52 = or i32 %51, 8
  %53 = call float @llvm.SI.load.const(<16 x i8> %50, i32 %52)
  %54 = add i32 %29, 1
  %55 = and i32 %54, 14
  %56 = zext i32 %55 to i64
  %57 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 %56, !amdgpu.uniform !0
  %58 = load <16 x i8>, <16 x i8> addrspace(2)* %57, align 16, !invariant.load !0
  %59 = shl i32 %30, 4
  %60 = or i32 %59, 12
  %61 = call float @llvm.SI.load.const(<16 x i8> %58, i32 %60)
  %62 = bitcast float %5 to i32
  %63 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %62, 10
  %64 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %63, float %37, 11
  %65 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %64, float %45, 12
  %66 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %65, float %53, 13
  %67 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %66, float %61, 14
  %68 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %67, float %21, 24
  ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %68
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

attributes #0 = { "InitialPSInputAddr"="36983" }
attributes #1 = { nounwind readnone }

!0 = !{}