[llvm-bugs] [Bug 36993] New: Opportunity to reduce branch usage by merging conditional checks
via llvm-bugs
llvm-bugs at lists.llvm.org
Tue Apr 3 13:12:23 PDT 2018
https://bugs.llvm.org/show_bug.cgi?id=36993
Bug ID: 36993
Summary: Opportunity to reduce branch usage by merging
conditional checks
Product: libraries
Version: trunk
Hardware: PC
OS: Linux
Status: NEW
Severity: enhancement
Priority: P
Component: Backend: X86
Assignee: unassignedbugs at nondot.org
Reporter: listmail at philipreames.com
CC: llvm-bugs at lists.llvm.org
I noticed that if we have two adjacent conditions of the form typical for
optimized range checks (a lt test between two known positive numbers), we can
rewrite the test by rewriting in forms of a subtract/and idiom. The basic idea
is that we know a LT test fails for two positive numbers exactly when the
result of (RHS-LHS) is less than zero. (We know the subtract can't wrap
through INT_MIN since both are positive.) Beyond LT tests, this can also be
used to merge adjacent null tests as well (since for non-kernel pointers, both
are positive).
The net result is that we can use one fewer branch instruction to perform the
pair of comparisons at the cost of an extra live register through the sub/and
sequence. The tradeoff here is not obvious, but this may be a useful idiom for
reducing pressure on branch resources.
The IR below was hand massaged to produce the assembly I think we should be
emitting here. "test" if our current default. "alt" is the proposed
alternative.
define i1 @test(i32* %p0) {
entry:
%p1 = getelementptr i32, i32* %p0, i32 1
%p2 = getelementptr i32, i32* %p0, i32 2
%p3 = getelementptr i32, i32* %p0, i32 3
%v0 = load i32, i32* %p0, !range !{ i32 0, i32 1024 }
%v1 = load i32, i32* %p1, !range !{ i32 0, i32 1024 }
%v2 = load i32, i32* %p2, !range !{ i32 0, i32 1024 }
%v3 = load i32, i32* %p3, !range !{ i32 0, i32 1024 }
%chk1 = icmp ult i32 %v0, %v1
%chk2 = icmp ult i32 %v2, %v3
%and = and i1 %chk1, %chk2
ret i1 %and
}
define i1 @alt(i32* %p0) {
entry:
%p1 = getelementptr i32, i32* %p0, i32 1
%p2 = getelementptr i32, i32* %p0, i32 2
%p3 = getelementptr i32, i32* %p0, i32 3
%v0 = load i32, i32* %p0, !range !{ i32 0, i32 1024 }
%v1 = load i32, i32* %p1, !range !{ i32 0, i32 1024 }
%v2 = load i32, i32* %p2, !range !{ i32 0, i32 1024 }
%v3 = load i32, i32* %p3, !range !{ i32 0, i32 1024 }
%sub1 = sub i32 %v1, %v0
%sub2 = sub i32 %v3, %v2
%and = and i32 %sub1, %sub2
%cmp = icmp slt i32 %and, 0
ret i1 %cmp
}
declare void @foo()
define void @test_br(i32* %p0) {
entry:
%p1 = getelementptr i32, i32* %p0, i32 1
%p2 = getelementptr i32, i32* %p0, i32 2
%p3 = getelementptr i32, i32* %p0, i32 3
%v0 = load i32, i32* %p0, !range !{ i32 0, i32 1024 }
%v1 = load i32, i32* %p1, !range !{ i32 0, i32 1024 }
%v2 = load i32, i32* %p2, !range !{ i32 0, i32 1024 }
%v3 = load i32, i32* %p3, !range !{ i32 0, i32 1024 }
%chk1 = icmp ult i32 %v0, %v1
%chk2 = icmp ult i32 %v2, %v3
%and = and i1 %chk1, %chk2
br i1 %and, label %taken, label %merge
taken:
call void @foo()
br label %merge
merge:
ret void
}
define void @alt_br(i32* %p0) {
entry:
%p1 = getelementptr i32, i32* %p0, i32 1
%p2 = getelementptr i32, i32* %p0, i32 2
%p3 = getelementptr i32, i32* %p0, i32 3
%v0 = load i32, i32* %p0, !range !{ i32 0, i32 1024 }
%v1 = load i32, i32* %p1, !range !{ i32 0, i32 1024 }
%v2 = load i32, i32* %p2, !range !{ i32 0, i32 1024 }
%v3 = load i32, i32* %p3, !range !{ i32 0, i32 1024 }
%sub1 = sub i32 %v1, %v0
%sub2 = sub i32 %v3, %v2
%and = and i32 %sub1, %sub2
%cmp = icmp slt i32 %and, 0
br i1 %cmp, label %taken, label %merge
taken:
call void @foo()
br label %merge
merge:
ret void
}
Output (haswell, but pretty similar on all micros tried)
.text
.file "<stdin>"
.globl test # -- Begin function test
.p2align 4, 0x90
.type test, at function
test: # @test
.cfi_startproc
# %bb.0: # %entry
movl (%rdi), %eax
movl 8(%rdi), %ecx
cmpl 4(%rdi), %eax
setb %dl
cmpl 12(%rdi), %ecx
setb %al
andb %dl, %al
retq
.Lfunc_end0:
.size test, .Lfunc_end0-test
.cfi_endproc
# -- End function
.globl alt # -- Begin function alt
.p2align 4, 0x90
.type alt, at function
alt: # @alt
.cfi_startproc
# %bb.0: # %entry
movl 4(%rdi), %ecx
movl 12(%rdi), %eax
subl (%rdi), %ecx
subl 8(%rdi), %eax
andl %ecx, %eax
shrl $31, %eax
# kill: def $al killed $al killed $eax
retq
.Lfunc_end1:
.size alt, .Lfunc_end1-alt
.cfi_endproc
# -- End function
.globl test_br # -- Begin function test_br
.p2align 4, 0x90
.type test_br, at function
test_br: # @test_br
.cfi_startproc
# %bb.0: # %entry
pushq %rax
.cfi_def_cfa_offset 16
movl (%rdi), %eax
cmpl 4(%rdi), %eax
jae .LBB2_3
# %bb.1: # %entry
movl 12(%rdi), %eax
cmpl %eax, 8(%rdi)
jae .LBB2_3
# %bb.2: # %taken
callq foo
.LBB2_3: # %merge
popq %rax
retq
.Lfunc_end2:
.size test_br, .Lfunc_end2-test_br
.cfi_endproc
# -- End function
.globl alt_br # -- Begin function alt_br
.p2align 4, 0x90
.type alt_br, at function
alt_br: # @alt_br
.cfi_startproc
# %bb.0: # %entry
pushq %rax
.cfi_def_cfa_offset 16
movl 4(%rdi), %eax
subl (%rdi), %eax
movl 12(%rdi), %ecx
subl 8(%rdi), %ecx
testl %ecx, %eax
js .LBB3_1
# %bb.2: # %merge
popq %rax
retq
.LBB3_1: # %taken
callq foo
popq %rax
retq
.Lfunc_end3:
.size alt_br, .Lfunc_end3-alt_br
.cfi_endproc
# -- End function
.section ".note.GNU-stack","", at progbits
--
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20180403/f1fc3474/attachment-0001.html>
More information about the llvm-bugs
mailing list