[llvm-dev] Expected constant simplification not happening

Tue Dec 13 10:48:27 PST 2016

AFAIK, there haven't been any changes in this area. It's in my bug queue
(that only seem to get longer!), but I don't have immediate plans to look
at it.

On Wed, Dec 7, 2016 at 10:13 AM, Nat! <nat at mulle-kybernetik.com> wrote:

> Hello
>
> Has there been any progress on this topic ? The 3.9 optimizer output is
> still the same as I just looked.
>
> https://llvm.org/bugs/show_bug.cgi?id=24448
>
> Ciao
>    Nat!
>
>
>
> Sanjay Patel schrieb:
>
>> [cc'ing Zia]
>>
>> We have this transform with -Os for some cases after:
>> http://reviews.llvm.org/rL244601
>> http://reviews.llvm.org/D11363
>>
>> but something in this example is causing the transform to not trigger.
>>
>> I filed a related bug here:
>> https://llvm.org/bugs/show_bug.cgi?id=24448
>>
>> If you can file your test case(s) in a bug report, that would be the
>> best way to track progress on solving it. Thanks!
>>
>>
>> On Thu, Feb 11, 2016 at 9:58 AM, Nat! via llvm-dev
>> <llvm-dev at lists.llvm.org <mailto:llvm-dev at lists.llvm.org>> wrote:
>>
>>     Hi
>>
>>     the appended IR code does not optimize to my liking :)
>>
>>     this is the interesting part in x86_64, that got produced via clang
>> -Os:
>>     ---
>>              movq    -16(%r12), %rax
>>              movl    -4(%rax), %ecx
>>              andl    $2298949, %ecx          ## imm = 0x231445
>>              cmpq    $2298949, (%rax,%rcx)   ## imm = 0x231445
>>              leaq    8(%rax,%rcx), %rax
>>              cmovneq %r15, %rax
>>              movl    $2298949, %esi          ## imm = 0x231445
>>              movq    %r12, %rdi
>>              movq    %r14, %rdx
>>              callq   *(%rax)
>>     ---
>>
>>
>>     and clang -O3:
>>     ---
>>              movq    -16(%r12), %rax
>>              movl    -4(%rax), %ecx
>>              andl    $2298949, %ecx          ## imm = 0x231445
>>              cmpl    $2298949, (%rax,%rcx)   ## imm = 0x231445
>>              jne     LBB1_4
>>              leaq    8(%rax,%rcx), %rax
>>              jmp     LBB1_5
>>              .align  4, 0x90
>>     LBB1_4:
>>              movq    %r15, %rax
>>     LBB1_5:
>>              movl    $2298949, %esi          ## imm = 0x231445
>>              movq    %r12, %rdi
>>              movq    %r14, %rdx
>>              callq   *(%rax)
>>     ---
>>
>>     As you can see in both cases the constant $2298949 is replicated 3
>>     times. I would have expected something like the following code at
>>     least for -Os:
>>
>>     ---
>>              movq    -16(%r12), %rax
>>              movl    $2298949, %esi          ### **** move on up
>>              movl    -4(%rax), %ecx
>>              andl    %esi, %ecx              ###
>>              cmpl    %esi, (%rax,%rcx)       ###
>>              leaq    8(%rax,%rcx), %rax
>>              cmovneq %r15, %rax
>>              movq    %r12, %rdi
>>              movq    %r14, %rdx
>>              callq   *(%rax)
>>     ---
>>
>>     It is much shorter (33 bytes vs. 42 bytes) and I would assume at
>>     least the same speed or better. This is with llvm 3.7.0. And yes
>>     this pains me at the moment :)
>>
>>     Ciao
>>         Nat!
>>     ----
>>     ; ModuleID = 'optimize-fail.c'
>>     target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
>>     target triple = "x86_64-apple-macosx10.10.0"
>>
>>     %struct._foo = type {}
>>     %struct._entry = type { i32, i32, i8* (%struct._foo*, i32, i8*)* }
>>     %struct._table = type { i64, i32, i32, [1 x %struct._entry] }
>>     %struct.test = type { %struct.__foo, i32 }
>>     %struct.__foo = type { %struct._dispatch }
>>     %struct._dispatch = type { %struct._entry*, i8* (%struct._foo*, i32,
>>     i8*)* }
>>
>>     @str = private unnamed_addr constant [8 x i8] c"table_f\00"
>>     @str.2 = private unnamed_addr constant [11 x i8] c"dispatch_f\00"
>>
>>     ; Function Attrs: nounwind
>>     declare void @llvm.lifetime.start(i64, i8* nocapture) #1
>>
>>     ; Function Attrs: nounwind
>>     declare void @llvm.lifetime.end(i64, i8* nocapture) #1
>>
>>     ; Function Attrs: noinline nounwind ssp uwtable
>>     define i8* @foo(%struct._foo* %obj, i32 %unused, i8* %value) #2 {
>>     entry:
>>        %tobool.i = icmp eq %struct._foo* %obj, null
>>        %0 = bitcast %struct._foo* %obj to i8*
>>        %arrayidx.i.i = getelementptr inbounds i8, i8* %0, i64 -16
>>        %entries2.i = bitcast i8* %arrayidx.i.i to %struct._entry**
>>        %f7.i = getelementptr inbounds i8, i8* %0, i64 -8
>>        br i1 %tobool.i, label %for.end, label %call.exit.preheader
>>
>>     call.exit.preheader:                              ; preds = %entry
>>        br label %call.exit
>>
>>     call.exit:                                        ; preds =
>>     %call.exit.preheader, %call.exit
>>        %i.04 = phi i32 [ %inc, %call.exit ], [ 0, %call.exit.preheader ]
>>        %1 = load %struct._entry*, %struct._entry** %entries2.i, align 8,
>>     !tbaa !2
>>        %arrayidx1.i.i = getelementptr inbounds %struct._entry,
>>     %struct._entry* %1, i64 -1
>>        %2 = bitcast %struct._entry* %arrayidx1.i.i to %struct._table*
>>        %mask4.i = getelementptr inbounds %struct._table, %struct._table*
>>     %2, i64 0, i32 2
>>        %3 = load i32, i32* %mask4.i, align 4, !tbaa !7
>>        %and.i = and i32 %3, 2298949
>>        %idxprom.i = zext i32 %and.i to i64
>>        %4 = bitcast %struct._entry* %1 to i8*
>>        %arrayidx.i = getelementptr inbounds i8, i8* %4, i64 %idxprom.i
>>        %key5.i = bitcast i8* %arrayidx.i to i32*
>>        %5 = load i32, i32* %key5.i, align 4, !tbaa !11
>>        %cmp.i = icmp eq i32 %5, 2298949
>>        %f6.i = getelementptr inbounds i8, i8* %arrayidx.i, i64 8
>>        %cond.in.v.i = select i1 %cmp.i, i8* %f6.i, i8* %f7.i
>>        %cond.in.i = bitcast i8* %cond.in.v.i to i8* (%struct._foo*, i32,
>>     i8*)**
>>        %cond.i = load i8* (%struct._foo*, i32, i8*)*, i8*
>>     (%struct._foo*, i32, i8*)** %cond.in.i, align 8
>>        %call8.i = tail call i8* %cond.i(%struct._foo* %obj, i32 2298949,
>>     i8* %value) #1
>>        %inc = add nuw nsw i32 %i.04, 1
>>        %exitcond = icmp eq i32 %inc, 100
>>        br i1 %exitcond, label %for.end.loopexit, label %call.exit
>>
>>     for.end.loopexit:                                 ; preds = %call.exit
>>        %call8.i.lcssa = phi i8* [ %call8.i, %call.exit ]
>>        br label %for.end
>>
>>     for.end:                                          ; preds =
>>     %for.end.loopexit, %entry
>>        %rval.0.lcssa = phi i8* [ %0, %entry ], [ %call8.i.lcssa,
>>     %for.end.loopexit ]
>>        ret i8* %rval.0.lcssa
>>     }
>>
>>
>>     attributes #1 = { nounwind }
>>     attributes #2 = { noinline nounwind ssp uwtable
>>     "disable-tail-calls"="false" "less-precise-fpmad"="false"
>>     "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"
>>     "no-infs-fp-math"="false" "no-nans-fp-math"="false"
>>     "stack-protector-buffer-size"="8" "target-cpu"="core2"
>>     "target-features"="+cx16,+sse,+sse2,+sse3,+ssse3"
>>     "unsafe-fp-math"="false" "use-soft-float"="false" }
>>
>>     !llvm.module.flags = !{!0}
>>     !llvm.ident = !{!1}
>>
>>     ---
>>
>>     _______________________________________________
>>     LLVM Developers mailing list
>>     llvm-dev at lists.llvm.org <mailto:llvm-dev at lists.llvm.org>
>>     http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-dev
>>
>>
>>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20161213/3eefa811/attachment.html>