<div dir="ltr">AFAIK, there haven't been any changes in this area. It's in my bug queue (that only seem to get longer!), but I don't have immediate plans to look at it.<br></div><div class="gmail_extra"><br><div class="gmail_quote">On Wed, Dec 7, 2016 at 10:13 AM, Nat! <span dir="ltr"><<a href="mailto:nat@mulle-kybernetik.com" target="_blank">nat@mulle-kybernetik.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Hello<br>

<br>

Has there been any progress on this topic ? The 3.9 optimizer output is still the same as I just looked.<br>

<br>

<a href="https://llvm.org/bugs/show_bug.cgi?id=24448" rel="noreferrer" target="_blank">https://llvm.org/bugs/show_bug<wbr>.cgi?id=24448</a><br>

<br>

Ciao<br>

   Nat!<br>

<br>

<br>

<br>

Sanjay Patel schrieb:<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><span class="">

[cc'ing Zia]<br>

<br>

We have this transform with -Os for some cases after:<br>

<a href="http://reviews.llvm.org/rL244601" rel="noreferrer" target="_blank">http://reviews.llvm.org/rL2446<wbr>01</a><br>

<a href="http://reviews.llvm.org/D11363" rel="noreferrer" target="_blank">http://reviews.llvm.org/D11363</a><br>

<br>

but something in this example is causing the transform to not trigger.<br>

<br>

I filed a related bug here:<br>

<a href="https://llvm.org/bugs/show_bug.cgi?id=24448" rel="noreferrer" target="_blank">https://llvm.org/bugs/show_bug<wbr>.cgi?id=24448</a><br>

<br>

If you can file your test case(s) in a bug report, that would be the<br>

best way to track progress on solving it. Thanks!<br>

<br>

<br>

On Thu, Feb 11, 2016 at 9:58 AM, Nat! via llvm-dev<br></span><div><div class="h5">

<<a href="mailto:llvm-dev@lists.llvm.org" target="_blank">llvm-dev@lists.llvm.org</a> <mailto:<a href="mailto:llvm-dev@lists.llvm.org" target="_blank">llvm-dev@lists.llvm.or<wbr>g</a>>> wrote:<br>

<br>

    Hi<br>

<br>

    the appended IR code does not optimize to my liking :)<br>

<br>

    this is the interesting part in x86_64, that got produced via clang -Os:<br>

    ---<br>

             movq    -16(%r12), %rax<br>

             movl    -4(%rax), %ecx<br>

             andl    $2298949, %ecx          ## imm = 0x231445<br>

             cmpq    $2298949, (%rax,%rcx)   ## imm = 0x231445<br>

             leaq    8(%rax,%rcx), %rax<br>

             cmovneq %r15, %rax<br>

             movl    $2298949, %esi          ## imm = 0x231445<br>

             movq    %r12, %rdi<br>

             movq    %r14, %rdx<br>

             callq   *(%rax)<br>

    ---<br>

<br>

<br>

    and clang -O3:<br>

    ---<br>

             movq    -16(%r12), %rax<br>

             movl    -4(%rax), %ecx<br>

             andl    $2298949, %ecx          ## imm = 0x231445<br>

             cmpl    $2298949, (%rax,%rcx)   ## imm = 0x231445<br>

             jne     LBB1_4<br>

             leaq    8(%rax,%rcx), %rax<br>

             jmp     LBB1_5<br>

             .align  4, 0x90<br>

    LBB1_4:<br>

             movq    %r15, %rax<br>

    LBB1_5:<br>

             movl    $2298949, %esi          ## imm = 0x231445<br>

             movq    %r12, %rdi<br>

             movq    %r14, %rdx<br>

             callq   *(%rax)<br>

    ---<br>

<br>

    As you can see in both cases the constant $2298949 is replicated 3<br>

    times. I would have expected something like the following code at<br>

    least for -Os:<br>

<br>

    ---<br>

             movq    -16(%r12), %rax<br>

             movl    $2298949, %esi          ### **** move on up<br>

             movl    -4(%rax), %ecx<br>

             andl    %esi, %ecx              ###<br>

             cmpl    %esi, (%rax,%rcx)       ###<br>

             leaq    8(%rax,%rcx), %rax<br>

             cmovneq %r15, %rax<br>

             movq    %r12, %rdi<br>

             movq    %r14, %rdx<br>

             callq   *(%rax)<br>

    ---<br>

<br>

    It is much shorter (33 bytes vs. 42 bytes) and I would assume at<br>

    least the same speed or better. This is with llvm 3.7.0. And yes<br>

    this pains me at the moment :)<br>

<br>

    Ciao<br>

        Nat!<br>

    ----<br>

    ; ModuleID = 'optimize-fail.c'<br>

    target datalayout = "e-m:o-i64:64-f80:128-n8:16:32<wbr>:64-S128"<br>

    target triple = "x86_64-apple-macosx10.10.0"<br>

<br>

    %struct._foo = type {}<br>

    %struct._entry = type { i32, i32, i8* (%struct._foo*, i32, i8*)* }<br>

    %struct._table = type { i64, i32, i32, [1 x %struct._entry] }<br>

    %struct.test = type { %struct.__foo, i32 }<br>

    %struct.__foo = type { %struct._dispatch }<br>

    %struct._dispatch = type { %struct._entry*, i8* (%struct._foo*, i32,<br>

    i8*)* }<br>

<br>

    @str = private unnamed_addr constant [8 x i8] c"table_f\00"<br>

    @str.2 = private unnamed_addr constant [11 x i8] c"dispatch_f\00"<br>

<br>

    ; Function Attrs: nounwind<br>

    declare void @llvm.lifetime.start(i64, i8* nocapture) #1<br>

<br>

    ; Function Attrs: nounwind<br>

    declare void @llvm.lifetime.end(i64, i8* nocapture) #1<br>

<br>

    ; Function Attrs: noinline nounwind ssp uwtable<br>

    define i8* @foo(%struct._foo* %obj, i32 %unused, i8* %value) #2 {<br>

    entry:<br>

       %tobool.i = icmp eq %struct._foo* %obj, null<br>

       %0 = bitcast %struct._foo* %obj to i8*<br>

       %arrayidx.i.i = getelementptr inbounds i8, i8* %0, i64 -16<br>

       %entries2.i = bitcast i8* %arrayidx.i.i to %struct._entry**<br>

       %f7.i = getelementptr inbounds i8, i8* %0, i64 -8<br>

       br i1 %tobool.i, label %for.end, label %call.exit.preheader<br>

<br>

    call.exit.preheader:                              ; preds = %entry<br>

       br label %call.exit<br>

<br>

    call.exit:                                        ; preds =<br>

    %call.exit.preheader, %call.exit<br>

       %i.04 = phi i32 [ %inc, %call.exit ], [ 0, %call.exit.preheader ]<br>

       %1 = load %struct._entry*, %struct._entry** %entries2.i, align 8,<br>

    !tbaa !2<br>

       %arrayidx1.i.i = getelementptr inbounds %struct._entry,<br>

    %struct._entry* %1, i64 -1<br>

       %2 = bitcast %struct._entry* %arrayidx1.i.i to %struct._table*<br>

       %mask4.i = getelementptr inbounds %struct._table, %struct._table*<br>

    %2, i64 0, i32 2<br>

       %3 = load i32, i32* %mask4.i, align 4, !tbaa !7<br>

       %and.i = and i32 %3, 2298949<br>

       %idxprom.i = zext i32 %and.i to i64<br>

       %4 = bitcast %struct._entry* %1 to i8*<br>

       %arrayidx.i = getelementptr inbounds i8, i8* %4, i64 %idxprom.i<br>

       %key5.i = bitcast i8* %arrayidx.i to i32*<br>

       %5 = load i32, i32* %key5.i, align 4, !tbaa !11<br>

       %cmp.i = icmp eq i32 %5, 2298949<br>

       %f6.i = getelementptr inbounds i8, i8* %arrayidx.i, i64 8<br>

       %cond.in.v.i = select i1 %cmp.i, i8* %f6.i, i8* %f7.i<br>

       %cond.in.i = bitcast i8* %cond.in.v.i to i8* (%struct._foo*, i32,<br>

    i8*)**<br>

       %cond.i = load i8* (%struct._foo*, i32, i8*)*, i8*<br>

    (%struct._foo*, i32, i8*)** %cond.in.i, align 8<br>

       %call8.i = tail call i8* %cond.i(%struct._foo* %obj, i32 2298949,<br>

    i8* %value) #1<br>

       %inc = add nuw nsw i32 %i.04, 1<br>

       %exitcond = icmp eq i32 %inc, 100<br>

       br i1 %exitcond, label %for.end.loopexit, label %call.exit<br>

<br>

    for.end.loopexit:                                 ; preds = %call.exit<br>

       %call8.i.lcssa = phi i8* [ %call8.i, %call.exit ]<br>

       br label %for.end<br>

<br>

    for.end:                                          ; preds =<br>

    %for.end.loopexit, %entry<br>

       %rval.0.lcssa = phi i8* [ %0, %entry ], [ %call8.i.lcssa,<br>

    %for.end.loopexit ]<br>

       ret i8* %rval.0.lcssa<br>

    }<br>

<br>

<br>

    attributes #1 = { nounwind }<br>

    attributes #2 = { noinline nounwind ssp uwtable<br>

    "disable-tail-calls"="false" "less-precise-fpmad"="false"<br>

    "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-lea<wbr>f"<br>

    "no-infs-fp-math"="false" "no-nans-fp-math"="false"<br>

    "stack-protector-buffer-size"=<wbr>"8" "target-cpu"="core2"<br>

    "target-features"="+cx16,+sse,<wbr>+sse2,+sse3,+ssse3"<br>

    "unsafe-fp-math"="false" "use-soft-float"="false" }<br>

<br>

    !llvm.module.flags = !{!0}<br>

    !llvm.ident = !{!1}<br>

<br>

    ---<br>

<br>

    ______________________________<wbr>_________________<br>

    LLVM Developers mailing list<br></div></div>

    <a href="mailto:llvm-dev@lists.llvm.org" target="_blank">llvm-dev@lists.llvm.org</a> <mailto:<a href="mailto:llvm-dev@lists.llvm.org" target="_blank">llvm-dev@lists.llvm.or<wbr>g</a>><br>

    <a href="http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-dev" rel="noreferrer" target="_blank">http://lists.llvm.org/cgi-bin/<wbr>mailman/listinfo/llvm-dev</a><br>

<br>

<br>

</blockquote>

<br>

</blockquote></div><br></div>