[llvm-dev] Expected constant simplification not happening

Nat! via llvm-dev llvm-dev at lists.llvm.org
Thu Feb 11 08:58:00 PST 2016


Hi

the appended IR code does not optimize to my liking :)

this is the interesting part in x86_64, that got produced via clang -Os:
---
	movq	-16(%r12), %rax
	movl	-4(%rax), %ecx
	andl	$2298949, %ecx          ## imm = 0x231445
	cmpq	$2298949, (%rax,%rcx)   ## imm = 0x231445
	leaq	8(%rax,%rcx), %rax
	cmovneq	%r15, %rax			
	movl	$2298949, %esi          ## imm = 0x231445
	movq	%r12, %rdi
	movq	%r14, %rdx
	callq	*(%rax)
---


and clang -O3:
---
	movq	-16(%r12), %rax
	movl	-4(%rax), %ecx
	andl	$2298949, %ecx          ## imm = 0x231445
	cmpl	$2298949, (%rax,%rcx)   ## imm = 0x231445
	jne	LBB1_4
	leaq	8(%rax,%rcx), %rax
	jmp	LBB1_5
	.align	4, 0x90
LBB1_4:
	movq	%r15, %rax
LBB1_5:
	movl	$2298949, %esi          ## imm = 0x231445
	movq	%r12, %rdi
	movq	%r14, %rdx
	callq	*(%rax)
---

As you can see in both cases the constant $2298949 is replicated 3 
times. I would have expected something like the following code at least 
for -Os:

---
	movq	-16(%r12), %rax
	movl	$2298949, %esi          ### **** move on up	
	movl	-4(%rax), %ecx
	andl	%esi, %ecx         	###
	cmpl	%esi, (%rax,%rcx)   	###
	leaq	8(%rax,%rcx), %rax
	cmovneq	%r15, %rax			
	movq	%r12, %rdi
	movq	%r14, %rdx
	callq	*(%rax)
---

It is much shorter (33 bytes vs. 42 bytes) and I would assume at least 
the same speed or better. This is with llvm 3.7.0. And yes this pains me 
at the moment :)

Ciao
    Nat!
----
; ModuleID = 'optimize-fail.c'
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.10.0"

%struct._foo = type {}
%struct._entry = type { i32, i32, i8* (%struct._foo*, i32, i8*)* }
%struct._table = type { i64, i32, i32, [1 x %struct._entry] }
%struct.test = type { %struct.__foo, i32 }
%struct.__foo = type { %struct._dispatch }
%struct._dispatch = type { %struct._entry*, i8* (%struct._foo*, i32, i8*)* }

@str = private unnamed_addr constant [8 x i8] c"table_f\00"
@str.2 = private unnamed_addr constant [11 x i8] c"dispatch_f\00"

; Function Attrs: nounwind
declare void @llvm.lifetime.start(i64, i8* nocapture) #1

; Function Attrs: nounwind
declare void @llvm.lifetime.end(i64, i8* nocapture) #1

; Function Attrs: noinline nounwind ssp uwtable
define i8* @foo(%struct._foo* %obj, i32 %unused, i8* %value) #2 {
entry:
   %tobool.i = icmp eq %struct._foo* %obj, null
   %0 = bitcast %struct._foo* %obj to i8*
   %arrayidx.i.i = getelementptr inbounds i8, i8* %0, i64 -16
   %entries2.i = bitcast i8* %arrayidx.i.i to %struct._entry**
   %f7.i = getelementptr inbounds i8, i8* %0, i64 -8
   br i1 %tobool.i, label %for.end, label %call.exit.preheader

call.exit.preheader:                              ; preds = %entry
   br label %call.exit

call.exit:                                        ; preds = 
%call.exit.preheader, %call.exit
   %i.04 = phi i32 [ %inc, %call.exit ], [ 0, %call.exit.preheader ]
   %1 = load %struct._entry*, %struct._entry** %entries2.i, align 8, 
!tbaa !2
   %arrayidx1.i.i = getelementptr inbounds %struct._entry, 
%struct._entry* %1, i64 -1
   %2 = bitcast %struct._entry* %arrayidx1.i.i to %struct._table*
   %mask4.i = getelementptr inbounds %struct._table, %struct._table* %2, 
i64 0, i32 2
   %3 = load i32, i32* %mask4.i, align 4, !tbaa !7
   %and.i = and i32 %3, 2298949
   %idxprom.i = zext i32 %and.i to i64
   %4 = bitcast %struct._entry* %1 to i8*
   %arrayidx.i = getelementptr inbounds i8, i8* %4, i64 %idxprom.i
   %key5.i = bitcast i8* %arrayidx.i to i32*
   %5 = load i32, i32* %key5.i, align 4, !tbaa !11
   %cmp.i = icmp eq i32 %5, 2298949
   %f6.i = getelementptr inbounds i8, i8* %arrayidx.i, i64 8
   %cond.in.v.i = select i1 %cmp.i, i8* %f6.i, i8* %f7.i
   %cond.in.i = bitcast i8* %cond.in.v.i to i8* (%struct._foo*, i32, i8*)**
   %cond.i = load i8* (%struct._foo*, i32, i8*)*, i8* (%struct._foo*, 
i32, i8*)** %cond.in.i, align 8
   %call8.i = tail call i8* %cond.i(%struct._foo* %obj, i32 2298949, i8* 
%value) #1
   %inc = add nuw nsw i32 %i.04, 1
   %exitcond = icmp eq i32 %inc, 100
   br i1 %exitcond, label %for.end.loopexit, label %call.exit

for.end.loopexit:                                 ; preds = %call.exit
   %call8.i.lcssa = phi i8* [ %call8.i, %call.exit ]
   br label %for.end

for.end:                                          ; preds = 
%for.end.loopexit, %entry
   %rval.0.lcssa = phi i8* [ %0, %entry ], [ %call8.i.lcssa, 
%for.end.loopexit ]
   ret i8* %rval.0.lcssa
}


attributes #1 = { nounwind }
attributes #2 = { noinline nounwind ssp uwtable 
"disable-tail-calls"="false" "less-precise-fpmad"="false" 
"no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" 
"no-infs-fp-math"="false" "no-nans-fp-math"="false" 
"stack-protector-buffer-size"="8" "target-cpu"="core2" 
"target-features"="+cx16,+sse,+sse2,+sse3,+ssse3" 
"unsafe-fp-math"="false" "use-soft-float"="false" }

!llvm.module.flags = !{!0}
!llvm.ident = !{!1}

---



More information about the llvm-dev mailing list