[LLVMbugs] [Bug 2053] New: Inefficient code generated checking 32-bit multiply for signed overflow on x86
bugzilla-daemon at cs.uiuc.edu
bugzilla-daemon at cs.uiuc.edu
Sun Feb 17 07:20:04 PST 2008
http://llvm.org/bugs/show_bug.cgi?id=2053
Summary: Inefficient code generated checking 32-bit multiply for
signed overflow on x86
Product: new-bugs
Version: unspecified
Platform: PC
OS/Version: Linux
Status: NEW
Severity: enhancement
Priority: P2
Component: new bugs
AssignedTo: unassignedbugs at nondot.org
ReportedBy: sharparrow1 at yahoo.com
CC: llvmbugs at cs.uiuc.edu
The generated code on x86 for checking for signed overflow on a multiply the
obvious way is much longer than it needs to be.
Original C code:
int x(int a, int b) {long long prod = (long long)a*b; return prod > 0x7FFFFFFF
|| prod < (-0x7FFFFFFF-1);}
Generated LLVM IL (generated using clang -emit-llvm-bc | opt
-std-compile-opts):
define i32 @x(i32 %a, i32 %b) nounwind {
entry:
%conv = sext i32 %a to i64 ; <i64> [#uses=1]
%conv2 = sext i32 %b to i64 ; <i64> [#uses=1]
%mul = mul i64 %conv2, %conv ; <i64> [#uses=2]
%cmp = icmp sgt i64 %mul, 2147483647 ; <i1> [#uses=1]
br i1 %cmp, label %UnifiedReturnBlock, label %lor_rhs
lor_rhs: ; preds = %entry
%cmp6 = icmp slt i64 %mul, -2147483648 ; <i1> [#uses=1]
%phitmp = zext i1 %cmp6 to i32 ; <i32> [#uses=1]
ret i32 %phitmp
UnifiedReturnBlock: ; preds = %entry
ret i32 1
}
Code generated by llc:
x:
pushl %ebx
pushl %esi
movl 16(%esp), %eax
imull 12(%esp)
testl %eax, %eax
sets %cl
movzbw %cl, %cx
testl %edx, %edx
setg %bl
movzbw %bl, %si
cmove %cx, %si
movw %si, %cx
testb $1, %cl
je .LBB1_3 # lor_rhs
.LBB1_1: # UnifiedReturnBlock
movl $1, %eax
.LBB1_2: # UnifiedReturnBlock
popl %esi
popl %ebx
ret
.LBB1_3: # lor_rhs
cmpl $2147483648, %eax
setb %al
movzbw %al, %ax
cmpl $4294967295, %edx
setl %cl
movzbw %cl, %cx
cmove %ax, %cx
movw %cx, %ax
andl $1, %eax
jmp .LBB1_2 # UnifiedReturnBlock
Code generated by gcc:
x:
movl 8(%esp), %eax
imull 4(%esp)
addl $-2147483648, %eax
adcl $0, %edx
xorl %eax, %eax
cmpl $0, %edx
seta %al
ret
LLVM seems to be able to deal with the following variant much more easily:
int x(int a, int b) {long long prod = (long long)a*b; return (prod >
0x7FFFFFFF) | (prod < (-0x7FFFFFFF-1));}
Note that this code uses "|" instead of "||".
Generated LLVM IL (generated using clang -emit-llvm-bc | opt
-std-compile-opts):
define i32 @x(i32 %a, i32 %b) nounwind {
entry:
%conv = sext i32 %a to i64 ; <i64> [#uses=1]
%conv2 = sext i32 %b to i64 ; <i64> [#uses=1]
%mul = mul i64 %conv2, %conv ; <i64> [#uses=1]
%mul.off = add i64 %mul, 2147483648 ; <i64> [#uses=1]
%or1 = icmp ugt i64 %mul.off, 4294967295 ; <i1>
[#uses=1]
%or = zext i1 %or1 to i32 ; <i32> [#uses=1]
ret i32 %or
}
Code generated by llc:
x:
movl 8(%esp), %eax
imull 4(%esp)
addl $2147483648, %eax
adcl $0, %edx
testl %edx, %edx
setne %al
movzbl %al, %eax
ret
Here's the ideal code as far as I know (handwritten by myself; not really
tested, but all it does is use the overflow flag set by imull):
x:
movl 8(%esp), %eax
imull 4(%esp)
setc %al
movzbl %al, %eax
ret
--
Configure bugmail: http://llvm.org/bugs/userprefs.cgi?tab=email
------- You are receiving this mail because: -------
You are on the CC list for the bug.
More information about the llvm-bugs
mailing list