[LLVMdev] Need a clue to improve the optimization of some C code
Nat!
nat at mulle-kybernetik.com
Tue Mar 3 11:20:38 PST 2015
Am 03.03.2015 um 19:49 schrieb Philip Reames <listmail at philipreames.com>:
Hi Philip
first thanks for your response,
> You'll need to prove a bit more information to get any useful response. Questions:
> 1) What's you're use case? Are you using clang to compile C code? Are you manually generating LLVM IR?
yes the "inline function C code" will be compiled by clang.
> 2) Are you tracking ToT? If you're not, you should be.
Currently the git repo doesn't build for me (libLLVMR600CodeGen fails). Admittedly my IR output is from an older version - 3.6.
>
> Depending on your use case, you might find this document useful:
> http://llvm.org/docs/Frontend/PerformanceTips.html
Maybe yes, if it would be sensible to rewrite the IR, which I am wondering, if that is a good/useful idea ? I don't know.
I basically need to get LLVM to emit
~~~
cmpq %r15, (%rax,%rdx)
jne LBB0_4
leaq 0(%rdx,%rax), %rcx
LBB0_4:
callq *8(%rcx)
~~~
instead of
~~~
cmpq %r15, (%rax,%rdx)
je LBB0_2
addq $8, %rcx
jmp LBB0_4
LBB0_2:
leaq 8(%rdx,%rax), %rcx
LBB0_4:
callq *(%rcx)
~~~
If I can do this by rewriting the IR, it would be nice, because it hopefully translates to other architectures. And I would dig into that. If in general the approach is to keep the IR as it, but tweak the code generation I'll try that first.
I have only a coarse understanding how LLVM optimization works. It would be nice if I could avoid some common dead-ends right from the start.
Ciao
Nat!
>
> On 03/03/2015 06:06 AM, Nat! wrote:
>> Hi
>>
>> I have some inline function C code, that llvm could be optimizing better.
>> Since I am new to this, I wonder if someone could give me a few pointers, how to approach this in LLVM.
>>
>> Should I try to change the IR code -somehow- to get the code generator to generate better code, or should I rather go to the code generator and try to add an optimization pass ?
>>
>> Thanks for any feedback.
>>
>> Ciao
>> Nat!
>>
>>
>> P.S. In case someone is interested, here is the assembler code and the IR that produced it.
>>
>>
>>
>> Relevant LLVM generated x86_64 assembler portion with -Os
>> ~~~
>> testq %r12, %r12
>> je LBB0_5
>> ## BB#1:
>> movq -8(%r12), %rcx
>> movq (%rcx), %rax
>> movq -8(%rax), %rdx
>> andq %r15, %rdx
>> cmpq %r15, (%rax,%rdx)
>> je LBB0_2
>> ## BB#3:
>> addq $8, %rcx
>> jmp LBB0_4
>> LBB0_2:
>> leaq 8(%rdx,%rax), %rcx
>> LBB0_4:
>> movq %r12, %rdi
>> movq %r15, %rsi
>> movq %r14, %rdx
>> callq *(%rcx)
>> movq %rax, %rbx
>> LBB0_5:
>> ~~~
>>
>> Better/tighter assembler code would be (saves 2 instructions, one jump less)
>> ~~~
>> testq %r12, %r12
>> je LBB0_5
>>
>> movq -8(%r12), %rcx
>> movq (%rcx), %rax
>> movq -8(%rax), %rdx
>> andq %r15, %rdx
>> cmpq %r15, (%rax,%rdx)
>> jne LBB0_4
>>
>> leaq 0(%rdx,%rax), %rcx
>> LBB0_4:
>> movq %r12, %rdi
>> movq %r15, %rsi
>> movq %r14, %rdx
>> callq *8(%rcx)
>> movq %rax, %rbx
>> LBB0_5:
>> ~~~
>>
>>
>> This is the IR code, which produces above:
>> ~~~
>> ; Function Attrs: inlinehint nounwind
>> define internal %struct._foo* @call(%struct._foo* %foo, i8* %key, i8* %value) #2 {
>> %1 = alloca %struct._foo*, align 8
>> %2 = alloca %struct._foo*, align 8
>> %3 = alloca i8*, align 8
>> %4 = alloca i8*, align 8
>> %dispatch = alloca %struct._dispatch*, align 8
>> %table = alloca %struct._table*, align 8
>> %entrys = alloca %struct._entry*, align 8
>> %entry = alloca %struct._entry*, align 8
>> %offset = alloca i64, align 8
>> %mask = alloca i64, align 8
>> %f = alloca %struct._foo* (%struct._foo*, i8*, i8*)*, align 8
>> store %struct._foo* %foo, %struct._foo** %2, align 8
>> store i8* %key, i8** %3, align 8
>> store i8* %value, i8** %4, align 8
>> %5 = load %struct._foo** %2, align 8
>> %6 = icmp ne %struct._foo* %5, null
>> br i1 %6, label %9, label %7
>>
>> ; <label>:7 ; preds = %0
>> %8 = load %struct._foo** %2, align 8
>> store %struct._foo* %8, %struct._foo** %1
>> br label %50
>>
>> ; <label>:9 ; preds = %0
>> %10 = load %struct._foo** %2, align 8
>> %11 = call %struct._dispatch** @_dispatch_p_from_foo(%struct._foo* %10)
>> %12 = load %struct._dispatch** %11, align 8
>> store %struct._dispatch* %12, %struct._dispatch** %dispatch, align 8
>> %13 = load %struct._dispatch** %dispatch, align 8
>> %14 = getelementptr inbounds %struct._dispatch* %13, i32 0, i32 0
>> %15 = load %struct._entry** %14, align 8
>> store %struct._entry* %15, %struct._entry** %entrys, align 8
>> %16 = load %struct._entry** %entrys, align 8
>> %17 = call %struct._table* @_table_from_entrys(%struct._entry* %16)
>> store %struct._table* %17, %struct._table** %table, align 8
>> %18 = load %struct._table** %table, align 8
>> %19 = getelementptr inbounds %struct._table* %18, i32 0, i32 2
>> %20 = load i64* %19, align 8
>> store i64 %20, i64* %mask, align 8
>> %21 = load i8** %3, align 8
>> %22 = ptrtoint i8* %21 to i64
>> %23 = load i64* %mask, align 8
>> %24 = and i64 %22, %23
>> store i64 %24, i64* %offset, align 8
>> %25 = load i64* %offset, align 8
>> %26 = load %struct._entry** %entrys, align 8
>> %27 = bitcast %struct._entry* %26 to i8*
>> %28 = getelementptr inbounds i8* %27, i64 %25
>> %29 = bitcast i8* %28 to %struct._entry*
>> store %struct._entry* %29, %struct._entry** %entry, align 8
>> %30 = load %struct._entry** %entry, align 8
>> %31 = getelementptr inbounds %struct._entry* %30, i32 0, i32 0
>> %32 = load i8** %31, align 8
>> %33 = load i8** %3, align 8
>> %34 = icmp eq i8* %32, %33
>> br i1 %34, label %35, label %39
>>
>> ; <label>:35 ; preds = %9
>> %36 = load %struct._entry** %entry, align 8
>> %37 = getelementptr inbounds %struct._entry* %36, i32 0, i32 1
>> %38 = load %struct._foo* (%struct._foo*, i8*, i8*)** %37, align 8
>> br label %43
>>
>> ; <label>:39 ; preds = %9
>> %40 = load %struct._dispatch** %dispatch, align 8
>> %41 = getelementptr inbounds %struct._dispatch* %40, i32 0, i32 1
>> %42 = load %struct._foo* (%struct._foo*, i8*, i8*)** %41, align 8
>> br label %43
>>
>> ; <label>:43 ; preds = %39, %35
>> %44 = phi %struct._foo* (%struct._foo*, i8*, i8*)* [ %38, %35 ], [ %42, %39 ]
>> store %struct._foo* (%struct._foo*, i8*, i8*)* %44, %struct._foo* (%struct._foo*, i8*, i8*)** %f, align 8
>> %45 = load %struct._foo* (%struct._foo*, i8*, i8*)** %f, align 8
>> %46 = load %struct._foo** %2, align 8
>> %47 = load i8** %3, align 8
>> %48 = load i8** %4, align 8
>> %49 = call %struct._foo* %45(%struct._foo* %46, i8* %47, i8* %48)
>> store %struct._foo* %49, %struct._foo** %1
>> br label %50
>>
>> ; <label>:50 ; preds = %43, %7
>> %51 = load %struct._foo** %1
>> ret %struct._foo* %51
>> }
>>
>> ~~~
>>
>> ------------------------------------------------------
>> When vanity and rivalry disappear, all the lines go
>> out of your stomach and you slow down and coast
>> slowly to a stop in the middle. -- DLR
>>
>>
>> _______________________________________________
>> LLVM Developers mailing list
>> LLVMdev at cs.uiuc.edu http://llvm.cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev
>
>
------------------------------------------------------
Lemmings will hurl themselves at projects, promising
the world and then getting bored within a couple of
days as they realise that enthusiasm, good will, and
desperation do not produce much code by themselves.
-- H. Suleiman
More information about the llvm-dev
mailing list