[LLVMdev] Need a clue to improve the optimization of some C code
Nat!
nat at mulle-kybernetik.com
Tue Mar 3 06:06:05 PST 2015
Hi
I have some inline function C code, that llvm could be optimizing better.
Since I am new to this, I wonder if someone could give me a few pointers, how to approach this in LLVM.
Should I try to change the IR code -somehow- to get the code generator to generate better code, or should I rather go to the code generator and try to add an optimization pass ?
Thanks for any feedback.
Ciao
Nat!
P.S. In case someone is interested, here is the assembler code and the IR that produced it.
Relevant LLVM generated x86_64 assembler portion with -Os
~~~
testq %r12, %r12
je LBB0_5
## BB#1:
movq -8(%r12), %rcx
movq (%rcx), %rax
movq -8(%rax), %rdx
andq %r15, %rdx
cmpq %r15, (%rax,%rdx)
je LBB0_2
## BB#3:
addq $8, %rcx
jmp LBB0_4
LBB0_2:
leaq 8(%rdx,%rax), %rcx
LBB0_4:
movq %r12, %rdi
movq %r15, %rsi
movq %r14, %rdx
callq *(%rcx)
movq %rax, %rbx
LBB0_5:
~~~
Better/tighter assembler code would be (saves 2 instructions, one jump less)
~~~
testq %r12, %r12
je LBB0_5
movq -8(%r12), %rcx
movq (%rcx), %rax
movq -8(%rax), %rdx
andq %r15, %rdx
cmpq %r15, (%rax,%rdx)
jne LBB0_4
leaq 0(%rdx,%rax), %rcx
LBB0_4:
movq %r12, %rdi
movq %r15, %rsi
movq %r14, %rdx
callq *8(%rcx)
movq %rax, %rbx
LBB0_5:
~~~
This is the IR code, which produces above:
~~~
; Function Attrs: inlinehint nounwind
define internal %struct._foo* @call(%struct._foo* %foo, i8* %key, i8* %value) #2 {
%1 = alloca %struct._foo*, align 8
%2 = alloca %struct._foo*, align 8
%3 = alloca i8*, align 8
%4 = alloca i8*, align 8
%dispatch = alloca %struct._dispatch*, align 8
%table = alloca %struct._table*, align 8
%entrys = alloca %struct._entry*, align 8
%entry = alloca %struct._entry*, align 8
%offset = alloca i64, align 8
%mask = alloca i64, align 8
%f = alloca %struct._foo* (%struct._foo*, i8*, i8*)*, align 8
store %struct._foo* %foo, %struct._foo** %2, align 8
store i8* %key, i8** %3, align 8
store i8* %value, i8** %4, align 8
%5 = load %struct._foo** %2, align 8
%6 = icmp ne %struct._foo* %5, null
br i1 %6, label %9, label %7
; <label>:7 ; preds = %0
%8 = load %struct._foo** %2, align 8
store %struct._foo* %8, %struct._foo** %1
br label %50
; <label>:9 ; preds = %0
%10 = load %struct._foo** %2, align 8
%11 = call %struct._dispatch** @_dispatch_p_from_foo(%struct._foo* %10)
%12 = load %struct._dispatch** %11, align 8
store %struct._dispatch* %12, %struct._dispatch** %dispatch, align 8
%13 = load %struct._dispatch** %dispatch, align 8
%14 = getelementptr inbounds %struct._dispatch* %13, i32 0, i32 0
%15 = load %struct._entry** %14, align 8
store %struct._entry* %15, %struct._entry** %entrys, align 8
%16 = load %struct._entry** %entrys, align 8
%17 = call %struct._table* @_table_from_entrys(%struct._entry* %16)
store %struct._table* %17, %struct._table** %table, align 8
%18 = load %struct._table** %table, align 8
%19 = getelementptr inbounds %struct._table* %18, i32 0, i32 2
%20 = load i64* %19, align 8
store i64 %20, i64* %mask, align 8
%21 = load i8** %3, align 8
%22 = ptrtoint i8* %21 to i64
%23 = load i64* %mask, align 8
%24 = and i64 %22, %23
store i64 %24, i64* %offset, align 8
%25 = load i64* %offset, align 8
%26 = load %struct._entry** %entrys, align 8
%27 = bitcast %struct._entry* %26 to i8*
%28 = getelementptr inbounds i8* %27, i64 %25
%29 = bitcast i8* %28 to %struct._entry*
store %struct._entry* %29, %struct._entry** %entry, align 8
%30 = load %struct._entry** %entry, align 8
%31 = getelementptr inbounds %struct._entry* %30, i32 0, i32 0
%32 = load i8** %31, align 8
%33 = load i8** %3, align 8
%34 = icmp eq i8* %32, %33
br i1 %34, label %35, label %39
; <label>:35 ; preds = %9
%36 = load %struct._entry** %entry, align 8
%37 = getelementptr inbounds %struct._entry* %36, i32 0, i32 1
%38 = load %struct._foo* (%struct._foo*, i8*, i8*)** %37, align 8
br label %43
; <label>:39 ; preds = %9
%40 = load %struct._dispatch** %dispatch, align 8
%41 = getelementptr inbounds %struct._dispatch* %40, i32 0, i32 1
%42 = load %struct._foo* (%struct._foo*, i8*, i8*)** %41, align 8
br label %43
; <label>:43 ; preds = %39, %35
%44 = phi %struct._foo* (%struct._foo*, i8*, i8*)* [ %38, %35 ], [ %42, %39 ]
store %struct._foo* (%struct._foo*, i8*, i8*)* %44, %struct._foo* (%struct._foo*, i8*, i8*)** %f, align 8
%45 = load %struct._foo* (%struct._foo*, i8*, i8*)** %f, align 8
%46 = load %struct._foo** %2, align 8
%47 = load i8** %3, align 8
%48 = load i8** %4, align 8
%49 = call %struct._foo* %45(%struct._foo* %46, i8* %47, i8* %48)
store %struct._foo* %49, %struct._foo** %1
br label %50
; <label>:50 ; preds = %43, %7
%51 = load %struct._foo** %1
ret %struct._foo* %51
}
~~~
------------------------------------------------------
When vanity and rivalry disappear, all the lines go
out of your stomach and you slow down and coast
slowly to a stop in the middle. -- DLR
More information about the llvm-dev
mailing list