[llvm-dev] Expected constant simplification not happening
Ansari, Zia via llvm-dev
llvm-dev at lists.llvm.org
Fri Feb 12 11:50:56 PST 2016
I took a quick look at this and relaxed the conditions under which we prevent immediate subsumption (specifically, optsize and instruction shape), and I managed to merge 2 of those immediates. I need to dig in a little more to see why it didn’t catch the third. I suspect a phase ordering issue.
I’ll take a closer look at this as soon as I get a chance. If you could please attach a testcase to the bug report, I’d appreciate it.
Thanks,
Zia.
From: Sanjay Patel [mailto:spatel at rotateright.com]
Sent: Thursday, February 11, 2016 3:02 PM
To: Nat! <nat at mulle-kybernetik.com>
Cc: llvm-dev <llvm-dev at lists.llvm.org>; Ansari, Zia <zia.ansari at intel.com>
Subject: Re: [llvm-dev] Expected constant simplification not happening
[cc'ing Zia]
We have this transform with -Os for some cases after:
http://reviews.llvm.org/rL244601
http://reviews.llvm.org/D11363
but something in this example is causing the transform to not trigger.
I filed a related bug here:
https://llvm.org/bugs/show_bug.cgi?id=24448
If you can file your test case(s) in a bug report, that would be the best way to track progress on solving it. Thanks!
On Thu, Feb 11, 2016 at 9:58 AM, Nat! via llvm-dev <llvm-dev at lists.llvm.org<mailto:llvm-dev at lists.llvm.org>> wrote:
Hi
the appended IR code does not optimize to my liking :)
this is the interesting part in x86_64, that got produced via clang -Os:
---
movq -16(%r12), %rax
movl -4(%rax), %ecx
andl $2298949, %ecx ## imm = 0x231445
cmpq $2298949, (%rax,%rcx) ## imm = 0x231445
leaq 8(%rax,%rcx), %rax
cmovneq %r15, %rax
movl $2298949, %esi ## imm = 0x231445
movq %r12, %rdi
movq %r14, %rdx
callq *(%rax)
---
and clang -O3:
---
movq -16(%r12), %rax
movl -4(%rax), %ecx
andl $2298949, %ecx ## imm = 0x231445
cmpl $2298949, (%rax,%rcx) ## imm = 0x231445
jne LBB1_4
leaq 8(%rax,%rcx), %rax
jmp LBB1_5
.align 4, 0x90
LBB1_4:
movq %r15, %rax
LBB1_5:
movl $2298949, %esi ## imm = 0x231445
movq %r12, %rdi
movq %r14, %rdx
callq *(%rax)
---
As you can see in both cases the constant $2298949 is replicated 3 times. I would have expected something like the following code at least for -Os:
---
movq -16(%r12), %rax
movl $2298949, %esi ### **** move on up
movl -4(%rax), %ecx
andl %esi, %ecx ###
cmpl %esi, (%rax,%rcx) ###
leaq 8(%rax,%rcx), %rax
cmovneq %r15, %rax
movq %r12, %rdi
movq %r14, %rdx
callq *(%rax)
---
It is much shorter (33 bytes vs. 42 bytes) and I would assume at least the same speed or better. This is with llvm 3.7.0. And yes this pains me at the moment :)
Ciao
Nat!
----
; ModuleID = 'optimize-fail.c'
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.10.0"
%struct._foo = type {}
%struct._entry = type { i32, i32, i8* (%struct._foo*, i32, i8*)* }
%struct._table = type { i64, i32, i32, [1 x %struct._entry] }
%struct.test = type { %struct.__foo, i32 }
%struct.__foo = type { %struct._dispatch }
%struct._dispatch = type { %struct._entry*, i8* (%struct._foo*, i32, i8*)* }
@str = private unnamed_addr constant [8 x i8] c"table_f\00"
@str.2 = private unnamed_addr constant [11 x i8] c"dispatch_f\00"
; Function Attrs: nounwind
declare void @llvm.lifetime.start(i64, i8* nocapture) #1
; Function Attrs: nounwind
declare void @llvm.lifetime.end(i64, i8* nocapture) #1
; Function Attrs: noinline nounwind ssp uwtable
define i8* @foo(%struct._foo* %obj, i32 %unused, i8* %value) #2 {
entry:
%tobool.i = icmp eq %struct._foo* %obj, null
%0 = bitcast %struct._foo* %obj to i8*
%arrayidx.i.i = getelementptr inbounds i8, i8* %0, i64 -16
%entries2.i = bitcast i8* %arrayidx.i.i to %struct._entry**
%f7.i = getelementptr inbounds i8, i8* %0, i64 -8
br i1 %tobool.i, label %for.end, label %call.exit.preheader
call.exit.preheader: ; preds = %entry
br label %call.exit
call.exit: ; preds = %call.exit.preheader, %call.exit
%i.04 = phi i32 [ %inc, %call.exit ], [ 0, %call.exit.preheader ]
%1 = load %struct._entry*, %struct._entry** %entries2.i, align 8, !tbaa !2
%arrayidx1.i.i = getelementptr inbounds %struct._entry, %struct._entry* %1, i64 -1
%2 = bitcast %struct._entry* %arrayidx1.i.i to %struct._table*
%mask4.i = getelementptr inbounds %struct._table, %struct._table* %2, i64 0, i32 2
%3 = load i32, i32* %mask4.i, align 4, !tbaa !7
%and.i = and i32 %3, 2298949
%idxprom.i = zext i32 %and.i to i64
%4 = bitcast %struct._entry* %1 to i8*
%arrayidx.i = getelementptr inbounds i8, i8* %4, i64 %idxprom.i
%key5.i = bitcast i8* %arrayidx.i to i32*
%5 = load i32, i32* %key5.i, align 4, !tbaa !11
%cmp.i = icmp eq i32 %5, 2298949
%f6.i = getelementptr inbounds i8, i8* %arrayidx.i, i64 8
%cond.in.v.i = select i1 %cmp.i, i8* %f6.i, i8* %f7.i
%cond.in.i = bitcast i8* %cond.in.v.i to i8* (%struct._foo*, i32, i8*)**
%cond.i = load i8* (%struct._foo*, i32, i8*)*, i8* (%struct._foo*, i32, i8*)** %cond.in.i, align 8
%call8.i = tail call i8* %cond.i(%struct._foo* %obj, i32 2298949, i8* %value) #1
%inc = add nuw nsw i32 %i.04, 1
%exitcond = icmp eq i32 %inc, 100
br i1 %exitcond, label %for.end.loopexit, label %call.exit
for.end.loopexit: ; preds = %call.exit
%call8.i.lcssa = phi i8* [ %call8.i, %call.exit ]
br label %for.end
for.end: ; preds = %for.end.loopexit, %entry
%rval.0.lcssa = phi i8* [ %0, %entry ], [ %call8.i.lcssa, %for.end.loopexit ]
ret i8* %rval.0.lcssa
}
attributes #1 = { nounwind }
attributes #2 = { noinline nounwind ssp uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="core2" "target-features"="+cx16,+sse,+sse2,+sse3,+ssse3" "unsafe-fp-math"="false" "use-soft-float"="false" }
!llvm.module.flags = !{!0}
!llvm.ident = !{!1}
---
_______________________________________________
LLVM Developers mailing list
llvm-dev at lists.llvm.org<mailto:llvm-dev at lists.llvm.org>
http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-dev
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20160212/7a5d1fdf/attachment.html>
More information about the llvm-dev
mailing list