[llvm-dev] Code optimisation regression(?) in loops compiled with the -Oz flag
Joan Lluch via llvm-dev
llvm-dev at lists.llvm.org
Wed Sep 25 00:31:49 PDT 2019
Hi All,
This simple loop code
void loopTest()
{
for ( int i = 0 ; i<10 ; i++ ) {
ftest();
}
}
gets converted into this when compiled with the -Os flags:
; Function Attrs: nounwind optsize uwtable
define void @loopTest() local_unnamed_addr #0 {
entry:
br label %for.body
for.cond.cleanup: ; preds = %for.body
ret void
for.body: ; preds = %for.body, %entry
%i.03 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%call = tail call i32 (...) @ftest() #2
%inc = add nuw nsw i32 %i.03, 1
%exitcond = icmp eq i32 %inc, 10
br i1 %exitcond, label %for.cond.cleanup, label %for.body
}
For the x86 architecture, this gets compiled into this
.section __TEXT,__text,regular,pure_instructions
.macosx_version_min 10, 12
.globl _loopTest ## -- Begin function loopTest
_loopTest: ## @loopTest
.cfi_startproc
## %bb.0: ## %entry
pushq %rbp ## encoding: [0x55]
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp ## encoding: [0x48,0x89,0xe5]
.cfi_def_cfa_register %rbp
pushq %rbx ## encoding: [0x53]
pushq %rax ## encoding: [0x50]
.cfi_offset %rbx, -24
movl $10, %ebx ## encoding: [0xbb,0x0a,0x00,0x00,0x00]
LBB0_1: ## %for.body
## =>This Inner Loop Header: Depth=1
xorl %eax, %eax ## encoding: [0x31,0xc0]
callq _ftest ## encoding: [0xe8,A,A,A,A]
## fixup A - offset: 1, value: _ftest-4, kind: reloc_branch_4byte_pcrel
decl %ebx ## encoding: [0xff,0xcb]
jne LBB0_1 ## encoding: [0x75,A]
## fixup A - offset: 1, value: LBB0_1-1, kind: FK_PCRel_1
## %bb.2: ## %for.cond.cleanup
addq $8, %rsp ## encoding: [0x48,0x83,0xc4,0x08]
popq %rbx ## encoding: [0x5b]
popq %rbp ## encoding: [0x5d]
retq ## encoding: [0xc3]
.cfi_endproc
## -- End function
The same code compiled with -Oz results in the following:
; Function Attrs: minsize nounwind optsize uwtable
define void @loopTest() local_unnamed_addr #0 {
entry:
br label %for.cond
for.cond: ; preds = %for.body, %entry
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%exitcond = icmp eq i32 %i.0, 10
br i1 %exitcond, label %for.cond.cleanup, label %for.body
for.cond.cleanup: ; preds = %for.cond
ret void
for.body: ; preds = %for.cond
%call = tail call i32 (...) @ftest() #2
%inc = add nuw nsw i32 %i.0, 1
br label %for.cond
}
and this
.section __TEXT,__text,regular,pure_instructions
.macosx_version_min 10, 12
.globl _loopTest ## -- Begin function loopTest
_loopTest: ## @loopTest
.cfi_startproc
## %bb.0: ## %entry
pushq %rbp ## encoding: [0x55]
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp ## encoding: [0x48,0x89,0xe5]
.cfi_def_cfa_register %rbp
pushq %rbx ## encoding: [0x53]
pushq %rax ## encoding: [0x50]
.cfi_offset %rbx, -24
pushq $10 ## encoding: [0x6a,0x0a]
popq %rbx ## encoding: [0x5b]
LBB0_1: ## %for.cond
## =>This Inner Loop Header: Depth=1
testl %ebx, %ebx ## encoding: [0x85,0xdb]
je LBB0_2 ## encoding: [0x74,A]
## fixup A - offset: 1, value: LBB0_2-1, kind: FK_PCRel_1
## %bb.3: ## %for.body
## in Loop: Header=BB0_1 Depth=1
xorl %eax, %eax ## encoding: [0x31,0xc0]
callq _ftest ## encoding: [0xe8,A,A,A,A]
## fixup A - offset: 1, value: _ftest-4, kind: reloc_branch_4byte_pcrel
decl %ebx ## encoding: [0xff,0xcb]
jmp LBB0_1 ## encoding: [0xeb,A]
## fixup A - offset: 1, value: LBB0_1-1, kind: FK_PCRel_1
LBB0_2: ## %for.cond.cleanup
addq $8, %rsp ## encoding: [0x48,0x83,0xc4,0x08]
popq %rbx ## encoding: [0x5b]
popq %rbp ## encoding: [0x5d]
retq ## encoding: [0xc3]
.cfi_endproc
## -- End function
The resulting loop body code for -Oz is longer than -Os. This is because the exit loop comparison is performed at the beginning of the loop resulting in an additional jump instruction and missing opportunity to fold the exit condition with the iv decrement.
I believe this is a regression from Version 7.0, as I don’t recall having seen this previously. This affects most targets.
Should I create a bug report for that?
Thanks.
John
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20190925/0a17ac1e/attachment.html>
More information about the llvm-dev
mailing list